diff options
90 files changed, 7005 insertions, 4606 deletions
diff --git a/.travis/common/travis-ci.env b/.travis/common/travis-ci.env index ec8e2dd63..cffeb2e2b 100644 --- a/.travis/common/travis-ci.env +++ b/.travis/common/travis-ci.env | |||
| @@ -6,6 +6,8 @@ TRAVIS_BRANCH | |||
| 6 | TRAVIS_BUILD_ID | 6 | TRAVIS_BUILD_ID |
| 7 | TRAVIS_BUILD_NUMBER | 7 | TRAVIS_BUILD_NUMBER |
| 8 | TRAVIS_COMMIT | 8 | TRAVIS_COMMIT |
| 9 | TRAVIS_COMMIT_RANGE | ||
| 10 | TRAVIS_EVENT_TYPE | ||
| 9 | TRAVIS_JOB_ID | 11 | TRAVIS_JOB_ID |
| 10 | TRAVIS_JOB_NUMBER | 12 | TRAVIS_JOB_NUMBER |
| 11 | TRAVIS_REPO_SLUG | 13 | TRAVIS_REPO_SLUG |
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp index 874673c4e..4ce2d374e 100644 --- a/src/audio_core/stream.cpp +++ b/src/audio_core/stream.cpp | |||
| @@ -68,7 +68,7 @@ static void VolumeAdjustSamples(std::vector<s16>& samples) { | |||
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | // Implementation of a volume slider with a dynamic range of 60 dB | 70 | // Implementation of a volume slider with a dynamic range of 60 dB |
| 71 | const float volume_scale_factor{std::exp(6.90775f * volume) * 0.001f}; | 71 | const float volume_scale_factor = volume == 0 ? 0 : std::exp(6.90775f * volume) * 0.001f; |
| 72 | for (auto& sample : samples) { | 72 | for (auto& sample : samples) { |
| 73 | sample = static_cast<s16>(sample * volume_scale_factor); | 73 | sample = static_cast<s16>(sample * volume_scale_factor); |
| 74 | } | 74 | } |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 965c28787..f61bcd40d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -140,8 +140,6 @@ add_library(core STATIC | |||
| 140 | hle/kernel/svc_wrap.h | 140 | hle/kernel/svc_wrap.h |
| 141 | hle/kernel/thread.cpp | 141 | hle/kernel/thread.cpp |
| 142 | hle/kernel/thread.h | 142 | hle/kernel/thread.h |
| 143 | hle/kernel/timer.cpp | ||
| 144 | hle/kernel/timer.h | ||
| 145 | hle/kernel/vm_manager.cpp | 143 | hle/kernel/vm_manager.cpp |
| 146 | hle/kernel/vm_manager.h | 144 | hle/kernel/vm_manager.h |
| 147 | hle/kernel/wait_object.cpp | 145 | hle/kernel/wait_object.cpp |
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 0d6c85aed..90f276ee8 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h | |||
| @@ -217,6 +217,11 @@ private: | |||
| 217 | /// Push /// | 217 | /// Push /// |
| 218 | 218 | ||
| 219 | template <> | 219 | template <> |
| 220 | inline void ResponseBuilder::Push(s32 value) { | ||
| 221 | cmdbuf[index++] = static_cast<u32>(value); | ||
| 222 | } | ||
| 223 | |||
| 224 | template <> | ||
| 220 | inline void ResponseBuilder::Push(u32 value) { | 225 | inline void ResponseBuilder::Push(u32 value) { |
| 221 | cmdbuf[index++] = value; | 226 | cmdbuf[index++] = value; |
| 222 | } | 227 | } |
| @@ -235,6 +240,22 @@ inline void ResponseBuilder::Push(ResultCode value) { | |||
| 235 | } | 240 | } |
| 236 | 241 | ||
| 237 | template <> | 242 | template <> |
| 243 | inline void ResponseBuilder::Push(s8 value) { | ||
| 244 | PushRaw(value); | ||
| 245 | } | ||
| 246 | |||
| 247 | template <> | ||
| 248 | inline void ResponseBuilder::Push(s16 value) { | ||
| 249 | PushRaw(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | template <> | ||
| 253 | inline void ResponseBuilder::Push(s64 value) { | ||
| 254 | Push(static_cast<u32>(value)); | ||
| 255 | Push(static_cast<u32>(value >> 32)); | ||
| 256 | } | ||
| 257 | |||
| 258 | template <> | ||
| 238 | inline void ResponseBuilder::Push(u8 value) { | 259 | inline void ResponseBuilder::Push(u8 value) { |
| 239 | PushRaw(value); | 260 | PushRaw(value); |
| 240 | } | 261 | } |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 67674cd47..7a524ce5a 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "core/hle/kernel/process.h" | 18 | #include "core/hle/kernel/process.h" |
| 19 | #include "core/hle/kernel/resource_limit.h" | 19 | #include "core/hle/kernel/resource_limit.h" |
| 20 | #include "core/hle/kernel/thread.h" | 20 | #include "core/hle/kernel/thread.h" |
| 21 | #include "core/hle/kernel/timer.h" | ||
| 22 | #include "core/hle/lock.h" | 21 | #include "core/hle/lock.h" |
| 23 | #include "core/hle/result.h" | 22 | #include "core/hle/result.h" |
| 24 | 23 | ||
| @@ -86,27 +85,12 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_ | |||
| 86 | } | 85 | } |
| 87 | } | 86 | } |
| 88 | 87 | ||
| 89 | /// The timer callback event, called when a timer is fired | ||
| 90 | static void TimerCallback(u64 timer_handle, int cycles_late) { | ||
| 91 | const auto proper_handle = static_cast<Handle>(timer_handle); | ||
| 92 | const auto& system = Core::System::GetInstance(); | ||
| 93 | SharedPtr<Timer> timer = system.Kernel().RetrieveTimerFromCallbackHandleTable(proper_handle); | ||
| 94 | |||
| 95 | if (timer == nullptr) { | ||
| 96 | LOG_CRITICAL(Kernel, "Callback fired for invalid timer {:016X}", timer_handle); | ||
| 97 | return; | ||
| 98 | } | ||
| 99 | |||
| 100 | timer->Signal(cycles_late); | ||
| 101 | } | ||
| 102 | |||
| 103 | struct KernelCore::Impl { | 88 | struct KernelCore::Impl { |
| 104 | void Initialize(KernelCore& kernel) { | 89 | void Initialize(KernelCore& kernel) { |
| 105 | Shutdown(); | 90 | Shutdown(); |
| 106 | 91 | ||
| 107 | InitializeSystemResourceLimit(kernel); | 92 | InitializeSystemResourceLimit(kernel); |
| 108 | InitializeThreads(); | 93 | InitializeThreads(); |
| 109 | InitializeTimers(); | ||
| 110 | } | 94 | } |
| 111 | 95 | ||
| 112 | void Shutdown() { | 96 | void Shutdown() { |
| @@ -122,9 +106,6 @@ struct KernelCore::Impl { | |||
| 122 | thread_wakeup_callback_handle_table.Clear(); | 106 | thread_wakeup_callback_handle_table.Clear(); |
| 123 | thread_wakeup_event_type = nullptr; | 107 | thread_wakeup_event_type = nullptr; |
| 124 | 108 | ||
| 125 | timer_callback_handle_table.Clear(); | ||
| 126 | timer_callback_event_type = nullptr; | ||
| 127 | |||
| 128 | named_ports.clear(); | 109 | named_ports.clear(); |
| 129 | } | 110 | } |
| 130 | 111 | ||
| @@ -146,11 +127,6 @@ struct KernelCore::Impl { | |||
| 146 | CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); | 127 | CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); |
| 147 | } | 128 | } |
| 148 | 129 | ||
| 149 | void InitializeTimers() { | ||
| 150 | timer_callback_handle_table.Clear(); | ||
| 151 | timer_callback_event_type = CoreTiming::RegisterEvent("TimerCallback", TimerCallback); | ||
| 152 | } | ||
| 153 | |||
| 154 | std::atomic<u32> next_object_id{0}; | 130 | std::atomic<u32> next_object_id{0}; |
| 155 | std::atomic<u64> next_process_id{Process::ProcessIDMin}; | 131 | std::atomic<u64> next_process_id{Process::ProcessIDMin}; |
| 156 | std::atomic<u64> next_thread_id{1}; | 132 | std::atomic<u64> next_thread_id{1}; |
| @@ -161,12 +137,6 @@ struct KernelCore::Impl { | |||
| 161 | 137 | ||
| 162 | SharedPtr<ResourceLimit> system_resource_limit; | 138 | SharedPtr<ResourceLimit> system_resource_limit; |
| 163 | 139 | ||
| 164 | /// The event type of the generic timer callback event | ||
| 165 | CoreTiming::EventType* timer_callback_event_type = nullptr; | ||
| 166 | // TODO(yuriks): This can be removed if Timer objects are explicitly pooled in the future, | ||
| 167 | // allowing us to simply use a pool index or similar. | ||
| 168 | Kernel::HandleTable timer_callback_handle_table; | ||
| 169 | |||
| 170 | CoreTiming::EventType* thread_wakeup_event_type = nullptr; | 140 | CoreTiming::EventType* thread_wakeup_event_type = nullptr; |
| 171 | // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, | 141 | // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, |
| 172 | // allowing us to simply use a pool index or similar. | 142 | // allowing us to simply use a pool index or similar. |
| @@ -198,10 +168,6 @@ SharedPtr<Thread> KernelCore::RetrieveThreadFromWakeupCallbackHandleTable(Handle | |||
| 198 | return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); | 168 | return impl->thread_wakeup_callback_handle_table.Get<Thread>(handle); |
| 199 | } | 169 | } |
| 200 | 170 | ||
| 201 | SharedPtr<Timer> KernelCore::RetrieveTimerFromCallbackHandleTable(Handle handle) const { | ||
| 202 | return impl->timer_callback_handle_table.Get<Timer>(handle); | ||
| 203 | } | ||
| 204 | |||
| 205 | void KernelCore::AppendNewProcess(SharedPtr<Process> process) { | 171 | void KernelCore::AppendNewProcess(SharedPtr<Process> process) { |
| 206 | impl->process_list.push_back(std::move(process)); | 172 | impl->process_list.push_back(std::move(process)); |
| 207 | } | 173 | } |
| @@ -247,18 +213,10 @@ u64 KernelCore::CreateNewProcessID() { | |||
| 247 | return impl->next_process_id++; | 213 | return impl->next_process_id++; |
| 248 | } | 214 | } |
| 249 | 215 | ||
| 250 | ResultVal<Handle> KernelCore::CreateTimerCallbackHandle(const SharedPtr<Timer>& timer) { | ||
| 251 | return impl->timer_callback_handle_table.Create(timer); | ||
| 252 | } | ||
| 253 | |||
| 254 | CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { | 216 | CoreTiming::EventType* KernelCore::ThreadWakeupCallbackEventType() const { |
| 255 | return impl->thread_wakeup_event_type; | 217 | return impl->thread_wakeup_event_type; |
| 256 | } | 218 | } |
| 257 | 219 | ||
| 258 | CoreTiming::EventType* KernelCore::TimerCallbackEventType() const { | ||
| 259 | return impl->timer_callback_event_type; | ||
| 260 | } | ||
| 261 | |||
| 262 | Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { | 220 | Kernel::HandleTable& KernelCore::ThreadWakeupCallbackHandleTable() { |
| 263 | return impl->thread_wakeup_callback_handle_table; | 221 | return impl->thread_wakeup_callback_handle_table; |
| 264 | } | 222 | } |
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 58c9d108b..c643a6401 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h | |||
| @@ -22,7 +22,6 @@ class HandleTable; | |||
| 22 | class Process; | 22 | class Process; |
| 23 | class ResourceLimit; | 23 | class ResourceLimit; |
| 24 | class Thread; | 24 | class Thread; |
| 25 | class Timer; | ||
| 26 | 25 | ||
| 27 | /// Represents a single instance of the kernel. | 26 | /// Represents a single instance of the kernel. |
| 28 | class KernelCore { | 27 | class KernelCore { |
| @@ -51,9 +50,6 @@ public: | |||
| 51 | /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. | 50 | /// Retrieves a shared pointer to a Thread instance within the thread wakeup handle table. |
| 52 | SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; | 51 | SharedPtr<Thread> RetrieveThreadFromWakeupCallbackHandleTable(Handle handle) const; |
| 53 | 52 | ||
| 54 | /// Retrieves a shared pointer to a Timer instance within the timer callback handle table. | ||
| 55 | SharedPtr<Timer> RetrieveTimerFromCallbackHandleTable(Handle handle) const; | ||
| 56 | |||
| 57 | /// Adds the given shared pointer to an internal list of active processes. | 53 | /// Adds the given shared pointer to an internal list of active processes. |
| 58 | void AppendNewProcess(SharedPtr<Process> process); | 54 | void AppendNewProcess(SharedPtr<Process> process); |
| 59 | 55 | ||
| @@ -82,7 +78,6 @@ private: | |||
| 82 | friend class Object; | 78 | friend class Object; |
| 83 | friend class Process; | 79 | friend class Process; |
| 84 | friend class Thread; | 80 | friend class Thread; |
| 85 | friend class Timer; | ||
| 86 | 81 | ||
| 87 | /// Creates a new object ID, incrementing the internal object ID counter. | 82 | /// Creates a new object ID, incrementing the internal object ID counter. |
| 88 | u32 CreateNewObjectID(); | 83 | u32 CreateNewObjectID(); |
| @@ -93,15 +88,9 @@ private: | |||
| 93 | /// Creates a new thread ID, incrementing the internal thread ID counter. | 88 | /// Creates a new thread ID, incrementing the internal thread ID counter. |
| 94 | u64 CreateNewThreadID(); | 89 | u64 CreateNewThreadID(); |
| 95 | 90 | ||
| 96 | /// Creates a timer callback handle for the given timer. | ||
| 97 | ResultVal<Handle> CreateTimerCallbackHandle(const SharedPtr<Timer>& timer); | ||
| 98 | |||
| 99 | /// Retrieves the event type used for thread wakeup callbacks. | 91 | /// Retrieves the event type used for thread wakeup callbacks. |
| 100 | CoreTiming::EventType* ThreadWakeupCallbackEventType() const; | 92 | CoreTiming::EventType* ThreadWakeupCallbackEventType() const; |
| 101 | 93 | ||
| 102 | /// Retrieves the event type used for timer callbacks. | ||
| 103 | CoreTiming::EventType* TimerCallbackEventType() const; | ||
| 104 | |||
| 105 | /// Provides a reference to the thread wakeup callback handle table. | 94 | /// Provides a reference to the thread wakeup callback handle table. |
| 106 | Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); | 95 | Kernel::HandleTable& ThreadWakeupCallbackHandleTable(); |
| 107 | 96 | ||
diff --git a/src/core/hle/kernel/object.cpp b/src/core/hle/kernel/object.cpp index 806078638..8870463d0 100644 --- a/src/core/hle/kernel/object.cpp +++ b/src/core/hle/kernel/object.cpp | |||
| @@ -16,7 +16,6 @@ bool Object::IsWaitable() const { | |||
| 16 | case HandleType::ReadableEvent: | 16 | case HandleType::ReadableEvent: |
| 17 | case HandleType::Thread: | 17 | case HandleType::Thread: |
| 18 | case HandleType::Process: | 18 | case HandleType::Process: |
| 19 | case HandleType::Timer: | ||
| 20 | case HandleType::ServerPort: | 19 | case HandleType::ServerPort: |
| 21 | case HandleType::ServerSession: | 20 | case HandleType::ServerSession: |
| 22 | return true; | 21 | return true; |
diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 1541b6e3c..4c2505908 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h | |||
| @@ -25,7 +25,6 @@ enum class HandleType : u32 { | |||
| 25 | Thread, | 25 | Thread, |
| 26 | Process, | 26 | Process, |
| 27 | AddressArbiter, | 27 | AddressArbiter, |
| 28 | Timer, | ||
| 29 | ResourceLimit, | 28 | ResourceLimit, |
| 30 | ClientPort, | 29 | ClientPort, |
| 31 | ServerPort, | 30 | ServerPort, |
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index 6973e580c..0e5083f70 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp | |||
| @@ -44,8 +44,4 @@ ResultCode ReadableEvent::Reset() { | |||
| 44 | return RESULT_SUCCESS; | 44 | return RESULT_SUCCESS; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void ReadableEvent::WakeupAllWaitingThreads() { | ||
| 48 | WaitObject::WakeupAllWaitingThreads(); | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace Kernel | 47 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/readable_event.h b/src/core/hle/kernel/readable_event.h index 80b3b0aba..77a9c362c 100644 --- a/src/core/hle/kernel/readable_event.h +++ b/src/core/hle/kernel/readable_event.h | |||
| @@ -39,8 +39,6 @@ public: | |||
| 39 | bool ShouldWait(Thread* thread) const override; | 39 | bool ShouldWait(Thread* thread) const override; |
| 40 | void Acquire(Thread* thread) override; | 40 | void Acquire(Thread* thread) override; |
| 41 | 41 | ||
| 42 | void WakeupAllWaitingThreads() override; | ||
| 43 | |||
| 44 | /// Unconditionally clears the readable event's state. | 42 | /// Unconditionally clears the readable event's state. |
| 45 | void Clear(); | 43 | void Clear(); |
| 46 | 44 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 6588bd3b8..7cfecb68c 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -597,6 +597,7 @@ enum class BreakType : u32 { | |||
| 597 | PostNROLoad = 4, | 597 | PostNROLoad = 4, |
| 598 | PreNROUnload = 5, | 598 | PreNROUnload = 5, |
| 599 | PostNROUnload = 6, | 599 | PostNROUnload = 6, |
| 600 | CppException = 7, | ||
| 600 | }; | 601 | }; |
| 601 | 602 | ||
| 602 | struct BreakReason { | 603 | struct BreakReason { |
| @@ -669,6 +670,9 @@ static void Break(u32 reason, u64 info1, u64 info2) { | |||
| 669 | "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1, | 670 | "Signalling debugger, Unloaded an NRO at 0x{:016X} with size 0x{:016X}", info1, |
| 670 | info2); | 671 | info2); |
| 671 | break; | 672 | break; |
| 673 | case BreakType::CppException: | ||
| 674 | LOG_CRITICAL(Debug_Emulated, "Signalling debugger. Uncaught C++ exception encountered."); | ||
| 675 | break; | ||
| 672 | default: | 676 | default: |
| 673 | LOG_WARNING( | 677 | LOG_WARNING( |
| 674 | Debug_Emulated, | 678 | Debug_Emulated, |
diff --git a/src/core/hle/kernel/timer.cpp b/src/core/hle/kernel/timer.cpp deleted file mode 100644 index 2c4f50e2b..000000000 --- a/src/core/hle/kernel/timer.cpp +++ /dev/null | |||
| @@ -1,88 +0,0 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "core/core.h" | ||
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/core_timing_util.h" | ||
| 10 | #include "core/hle/kernel/handle_table.h" | ||
| 11 | #include "core/hle/kernel/kernel.h" | ||
| 12 | #include "core/hle/kernel/object.h" | ||
| 13 | #include "core/hle/kernel/thread.h" | ||
| 14 | #include "core/hle/kernel/timer.h" | ||
| 15 | |||
| 16 | namespace Kernel { | ||
| 17 | |||
| 18 | Timer::Timer(KernelCore& kernel) : WaitObject{kernel} {} | ||
| 19 | Timer::~Timer() = default; | ||
| 20 | |||
| 21 | SharedPtr<Timer> Timer::Create(KernelCore& kernel, ResetType reset_type, std::string name) { | ||
| 22 | SharedPtr<Timer> timer(new Timer(kernel)); | ||
| 23 | |||
| 24 | timer->reset_type = reset_type; | ||
| 25 | timer->signaled = false; | ||
| 26 | timer->name = std::move(name); | ||
| 27 | timer->initial_delay = 0; | ||
| 28 | timer->interval_delay = 0; | ||
| 29 | timer->callback_handle = kernel.CreateTimerCallbackHandle(timer).Unwrap(); | ||
| 30 | |||
| 31 | return timer; | ||
| 32 | } | ||
| 33 | |||
| 34 | bool Timer::ShouldWait(Thread* thread) const { | ||
| 35 | return !signaled; | ||
| 36 | } | ||
| 37 | |||
| 38 | void Timer::Acquire(Thread* thread) { | ||
| 39 | ASSERT_MSG(!ShouldWait(thread), "object unavailable!"); | ||
| 40 | |||
| 41 | if (reset_type == ResetType::OneShot) | ||
| 42 | signaled = false; | ||
| 43 | } | ||
| 44 | |||
| 45 | void Timer::Set(s64 initial, s64 interval) { | ||
| 46 | // Ensure we get rid of any previous scheduled event | ||
| 47 | Cancel(); | ||
| 48 | |||
| 49 | initial_delay = initial; | ||
| 50 | interval_delay = interval; | ||
| 51 | |||
| 52 | if (initial == 0) { | ||
| 53 | // Immediately invoke the callback | ||
| 54 | Signal(0); | ||
| 55 | } else { | ||
| 56 | CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(initial), kernel.TimerCallbackEventType(), | ||
| 57 | callback_handle); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | void Timer::Cancel() { | ||
| 62 | CoreTiming::UnscheduleEvent(kernel.TimerCallbackEventType(), callback_handle); | ||
| 63 | } | ||
| 64 | |||
| 65 | void Timer::Clear() { | ||
| 66 | signaled = false; | ||
| 67 | } | ||
| 68 | |||
| 69 | void Timer::WakeupAllWaitingThreads() { | ||
| 70 | WaitObject::WakeupAllWaitingThreads(); | ||
| 71 | } | ||
| 72 | |||
| 73 | void Timer::Signal(int cycles_late) { | ||
| 74 | LOG_TRACE(Kernel, "Timer {} fired", GetObjectId()); | ||
| 75 | |||
| 76 | signaled = true; | ||
| 77 | |||
| 78 | // Resume all waiting threads | ||
| 79 | WakeupAllWaitingThreads(); | ||
| 80 | |||
| 81 | if (interval_delay != 0) { | ||
| 82 | // Reschedule the timer with the interval delay | ||
| 83 | CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(interval_delay) - cycles_late, | ||
| 84 | kernel.TimerCallbackEventType(), callback_handle); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/timer.h b/src/core/hle/kernel/timer.h deleted file mode 100644 index 12915c1b1..000000000 --- a/src/core/hle/kernel/timer.h +++ /dev/null | |||
| @@ -1,90 +0,0 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "core/hle/kernel/object.h" | ||
| 9 | #include "core/hle/kernel/wait_object.h" | ||
| 10 | |||
| 11 | namespace Kernel { | ||
| 12 | |||
| 13 | class KernelCore; | ||
| 14 | |||
| 15 | class Timer final : public WaitObject { | ||
| 16 | public: | ||
| 17 | /** | ||
| 18 | * Creates a timer | ||
| 19 | * @param kernel The kernel instance to create the timer callback handle for. | ||
| 20 | * @param reset_type ResetType describing how to create the timer | ||
| 21 | * @param name Optional name of timer | ||
| 22 | * @return The created Timer | ||
| 23 | */ | ||
| 24 | static SharedPtr<Timer> Create(KernelCore& kernel, ResetType reset_type, | ||
| 25 | std::string name = "Unknown"); | ||
| 26 | |||
| 27 | std::string GetTypeName() const override { | ||
| 28 | return "Timer"; | ||
| 29 | } | ||
| 30 | std::string GetName() const override { | ||
| 31 | return name; | ||
| 32 | } | ||
| 33 | |||
| 34 | static const HandleType HANDLE_TYPE = HandleType::Timer; | ||
| 35 | HandleType GetHandleType() const override { | ||
| 36 | return HANDLE_TYPE; | ||
| 37 | } | ||
| 38 | |||
| 39 | ResetType GetResetType() const { | ||
| 40 | return reset_type; | ||
| 41 | } | ||
| 42 | |||
| 43 | u64 GetInitialDelay() const { | ||
| 44 | return initial_delay; | ||
| 45 | } | ||
| 46 | |||
| 47 | u64 GetIntervalDelay() const { | ||
| 48 | return interval_delay; | ||
| 49 | } | ||
| 50 | |||
| 51 | bool ShouldWait(Thread* thread) const override; | ||
| 52 | void Acquire(Thread* thread) override; | ||
| 53 | |||
| 54 | void WakeupAllWaitingThreads() override; | ||
| 55 | |||
| 56 | /** | ||
| 57 | * Starts the timer, with the specified initial delay and interval. | ||
| 58 | * @param initial Delay until the timer is first fired | ||
| 59 | * @param interval Delay until the timer is fired after the first time | ||
| 60 | */ | ||
| 61 | void Set(s64 initial, s64 interval); | ||
| 62 | |||
| 63 | void Cancel(); | ||
| 64 | void Clear(); | ||
| 65 | |||
| 66 | /** | ||
| 67 | * Signals the timer, waking up any waiting threads and rescheduling it | ||
| 68 | * for the next interval. | ||
| 69 | * This method should not be called from outside the timer callback handler, | ||
| 70 | * lest multiple callback events get scheduled. | ||
| 71 | */ | ||
| 72 | void Signal(int cycles_late); | ||
| 73 | |||
| 74 | private: | ||
| 75 | explicit Timer(KernelCore& kernel); | ||
| 76 | ~Timer() override; | ||
| 77 | |||
| 78 | ResetType reset_type; ///< The ResetType of this timer | ||
| 79 | |||
| 80 | u64 initial_delay; ///< The delay until the timer fires for the first time | ||
| 81 | u64 interval_delay; ///< The delay until the timer fires after the first time | ||
| 82 | |||
| 83 | bool signaled; ///< Whether the timer has been signaled or not | ||
| 84 | std::string name; ///< Name of timer (optional) | ||
| 85 | |||
| 86 | /// Handle used as userdata to reference this object when inserting into the CoreTiming queue. | ||
| 87 | Handle callback_handle; | ||
| 88 | }; | ||
| 89 | |||
| 90 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h index d70b67893..5987fb971 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/wait_object.h | |||
| @@ -33,19 +33,19 @@ public: | |||
| 33 | * Add a thread to wait on this object | 33 | * Add a thread to wait on this object |
| 34 | * @param thread Pointer to thread to add | 34 | * @param thread Pointer to thread to add |
| 35 | */ | 35 | */ |
| 36 | virtual void AddWaitingThread(SharedPtr<Thread> thread); | 36 | void AddWaitingThread(SharedPtr<Thread> thread); |
| 37 | 37 | ||
| 38 | /** | 38 | /** |
| 39 | * Removes a thread from waiting on this object (e.g. if it was resumed already) | 39 | * Removes a thread from waiting on this object (e.g. if it was resumed already) |
| 40 | * @param thread Pointer to thread to remove | 40 | * @param thread Pointer to thread to remove |
| 41 | */ | 41 | */ |
| 42 | virtual void RemoveWaitingThread(Thread* thread); | 42 | void RemoveWaitingThread(Thread* thread); |
| 43 | 43 | ||
| 44 | /** | 44 | /** |
| 45 | * Wake up all threads waiting on this object that can be awoken, in priority order, | 45 | * Wake up all threads waiting on this object that can be awoken, in priority order, |
| 46 | * and set the synchronization result and output of the thread. | 46 | * and set the synchronization result and output of the thread. |
| 47 | */ | 47 | */ |
| 48 | virtual void WakeupAllWaitingThreads(); | 48 | void WakeupAllWaitingThreads(); |
| 49 | 49 | ||
| 50 | /** | 50 | /** |
| 51 | * Wakes up a single thread waiting on this object. | 51 | * Wakes up a single thread waiting on this object. |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index d1cbe0e44..3f009d2b7 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -322,14 +322,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c | |||
| 322 | 322 | ||
| 323 | void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) { | 323 | void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) { |
| 324 | LOG_WARNING(Service_AM, "(STUBBED) called"); | 324 | LOG_WARNING(Service_AM, "(STUBBED) called"); |
| 325 | |||
| 325 | // TODO(Subv): Find out how AM determines the display to use, for now just | 326 | // TODO(Subv): Find out how AM determines the display to use, for now just |
| 326 | // create the layer in the Default display. | 327 | // create the layer in the Default display. |
| 327 | u64 display_id = nvflinger->OpenDisplay("Default"); | 328 | const auto display_id = nvflinger->OpenDisplay("Default"); |
| 328 | u64 layer_id = nvflinger->CreateLayer(display_id); | 329 | const auto layer_id = nvflinger->CreateLayer(*display_id); |
| 329 | 330 | ||
| 330 | IPC::ResponseBuilder rb{ctx, 4}; | 331 | IPC::ResponseBuilder rb{ctx, 4}; |
| 331 | rb.Push(RESULT_SUCCESS); | 332 | rb.Push(RESULT_SUCCESS); |
| 332 | rb.Push(layer_id); | 333 | rb.Push(*layer_id); |
| 333 | } | 334 | } |
| 334 | 335 | ||
| 335 | void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { | 336 | void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { |
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp index 41a573a91..b888f861d 100644 --- a/src/core/hle/service/am/applet_ae.cpp +++ b/src/core/hle/service/am/applet_ae.cpp | |||
| @@ -249,7 +249,8 @@ AppletAE::AppletAE(std::shared_ptr<NVFlinger::NVFlinger> nvflinger, | |||
| 249 | {300, nullptr, "OpenOverlayAppletProxy"}, | 249 | {300, nullptr, "OpenOverlayAppletProxy"}, |
| 250 | {350, nullptr, "OpenSystemApplicationProxy"}, | 250 | {350, nullptr, "OpenSystemApplicationProxy"}, |
| 251 | {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"}, | 251 | {400, nullptr, "CreateSelfLibraryAppletCreatorForDevelop"}, |
| 252 | {401, nullptr, "GetSystemAppletControllerForDebug"}, | 252 | {410, nullptr, "GetSystemAppletControllerForDebug"}, |
| 253 | {1000, nullptr, "GetDebugFunctions"}, | ||
| 253 | }; | 254 | }; |
| 254 | // clang-format on | 255 | // clang-format on |
| 255 | 256 | ||
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp index 657010312..088410564 100644 --- a/src/core/hle/service/audio/audin_u.cpp +++ b/src/core/hle/service/audio/audin_u.cpp | |||
| @@ -12,6 +12,7 @@ namespace Service::Audio { | |||
| 12 | class IAudioIn final : public ServiceFramework<IAudioIn> { | 12 | class IAudioIn final : public ServiceFramework<IAudioIn> { |
| 13 | public: | 13 | public: |
| 14 | IAudioIn() : ServiceFramework("IAudioIn") { | 14 | IAudioIn() : ServiceFramework("IAudioIn") { |
| 15 | // clang-format off | ||
| 15 | static const FunctionInfo functions[] = { | 16 | static const FunctionInfo functions[] = { |
| 16 | {0, nullptr, "GetAudioInState"}, | 17 | {0, nullptr, "GetAudioInState"}, |
| 17 | {1, nullptr, "StartAudioIn"}, | 18 | {1, nullptr, "StartAudioIn"}, |
| @@ -28,16 +29,24 @@ public: | |||
| 28 | {12, nullptr, "SetAudioInDeviceGain"}, | 29 | {12, nullptr, "SetAudioInDeviceGain"}, |
| 29 | {13, nullptr, "GetAudioInDeviceGain"}, | 30 | {13, nullptr, "GetAudioInDeviceGain"}, |
| 30 | }; | 31 | }; |
| 32 | // clang-format on | ||
| 33 | |||
| 31 | RegisterHandlers(functions); | 34 | RegisterHandlers(functions); |
| 32 | } | 35 | } |
| 33 | ~IAudioIn() = default; | 36 | ~IAudioIn() = default; |
| 34 | }; | 37 | }; |
| 35 | 38 | ||
| 36 | AudInU::AudInU() : ServiceFramework("audin:u") { | 39 | AudInU::AudInU() : ServiceFramework("audin:u") { |
| 40 | // clang-format off | ||
| 37 | static const FunctionInfo functions[] = { | 41 | static const FunctionInfo functions[] = { |
| 38 | {0, nullptr, "ListAudioIns"}, {1, nullptr, "OpenAudioIn"}, {2, nullptr, "Unknown"}, | 42 | {0, nullptr, "ListAudioIns"}, |
| 39 | {3, nullptr, "OpenAudioInAuto"}, {4, nullptr, "ListAudioInsAuto"}, | 43 | {1, nullptr, "OpenAudioIn"}, |
| 44 | {2, nullptr, "Unknown"}, | ||
| 45 | {3, nullptr, "OpenAudioInAuto"}, | ||
| 46 | {4, nullptr, "ListAudioInsAuto"}, | ||
| 40 | }; | 47 | }; |
| 48 | // clang-format on | ||
| 49 | |||
| 41 | RegisterHandlers(functions); | 50 | RegisterHandlers(functions); |
| 42 | } | 51 | } |
| 43 | 52 | ||
diff --git a/src/core/hle/service/audio/audrec_u.cpp b/src/core/hle/service/audio/audrec_u.cpp index 34974afa9..6956a2e64 100644 --- a/src/core/hle/service/audio/audrec_u.cpp +++ b/src/core/hle/service/audio/audrec_u.cpp | |||
| @@ -12,6 +12,7 @@ namespace Service::Audio { | |||
| 12 | class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> { | 12 | class IFinalOutputRecorder final : public ServiceFramework<IFinalOutputRecorder> { |
| 13 | public: | 13 | public: |
| 14 | IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") { | 14 | IFinalOutputRecorder() : ServiceFramework("IFinalOutputRecorder") { |
| 15 | // clang-format off | ||
| 15 | static const FunctionInfo functions[] = { | 16 | static const FunctionInfo functions[] = { |
| 16 | {0, nullptr, "GetFinalOutputRecorderState"}, | 17 | {0, nullptr, "GetFinalOutputRecorderState"}, |
| 17 | {1, nullptr, "StartFinalOutputRecorder"}, | 18 | {1, nullptr, "StartFinalOutputRecorder"}, |
| @@ -20,10 +21,13 @@ public: | |||
| 20 | {4, nullptr, "RegisterBufferEvent"}, | 21 | {4, nullptr, "RegisterBufferEvent"}, |
| 21 | {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"}, | 22 | {5, nullptr, "GetReleasedFinalOutputRecorderBuffer"}, |
| 22 | {6, nullptr, "ContainsFinalOutputRecorderBuffer"}, | 23 | {6, nullptr, "ContainsFinalOutputRecorderBuffer"}, |
| 23 | {7, nullptr, "Unknown"}, | 24 | {7, nullptr, "GetFinalOutputRecorderBufferEndTime"}, |
| 24 | {8, nullptr, "AppendFinalOutputRecorderBufferAuto"}, | 25 | {8, nullptr, "AppendFinalOutputRecorderBufferAuto"}, |
| 25 | {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"}, | 26 | {9, nullptr, "GetReleasedFinalOutputRecorderBufferAuto"}, |
| 27 | {10, nullptr, "FlushFinalOutputRecorderBuffers"}, | ||
| 26 | }; | 28 | }; |
| 29 | // clang-format on | ||
| 30 | |||
| 27 | RegisterHandlers(functions); | 31 | RegisterHandlers(functions); |
| 28 | } | 32 | } |
| 29 | ~IFinalOutputRecorder() = default; | 33 | ~IFinalOutputRecorder() = default; |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 945259c7d..76cc48254 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -229,14 +229,16 @@ private: | |||
| 229 | }; // namespace Audio | 229 | }; // namespace Audio |
| 230 | 230 | ||
| 231 | AudRenU::AudRenU() : ServiceFramework("audren:u") { | 231 | AudRenU::AudRenU() : ServiceFramework("audren:u") { |
| 232 | // clang-format off | ||
| 232 | static const FunctionInfo functions[] = { | 233 | static const FunctionInfo functions[] = { |
| 233 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, | 234 | {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, |
| 234 | {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, | 235 | {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, |
| 235 | {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, | 236 | {2, &AudRenU::GetAudioDeviceService, "GetAudioDeviceService"}, |
| 236 | {3, nullptr, "OpenAudioRendererAuto"}, | 237 | {3, nullptr, "OpenAudioRendererAuto"}, |
| 237 | {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, | 238 | {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, "GetAudioDeviceServiceWithRevisionInfo"}, |
| 238 | "GetAudioDeviceServiceWithRevisionInfo"}, | ||
| 239 | }; | 239 | }; |
| 240 | // clang-format on | ||
| 241 | |||
| 240 | RegisterHandlers(functions); | 242 | RegisterHandlers(functions); |
| 241 | } | 243 | } |
| 242 | 244 | ||
| @@ -313,7 +315,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 313 | LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz); | 315 | LOG_DEBUG(Service_Audio, "buffer_size=0x{:X}", output_sz); |
| 314 | } | 316 | } |
| 315 | 317 | ||
| 316 | void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) { | 318 | void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { |
| 317 | LOG_DEBUG(Service_Audio, "called"); | 319 | LOG_DEBUG(Service_Audio, "called"); |
| 318 | 320 | ||
| 319 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 321 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index c6bc3a90a..3d63388fb 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h | |||
| @@ -20,7 +20,7 @@ public: | |||
| 20 | private: | 20 | private: |
| 21 | void OpenAudioRenderer(Kernel::HLERequestContext& ctx); | 21 | void OpenAudioRenderer(Kernel::HLERequestContext& ctx); |
| 22 | void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); | 22 | void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); |
| 23 | void GetAudioDevice(Kernel::HLERequestContext& ctx); | 23 | void GetAudioDeviceService(Kernel::HLERequestContext& ctx); |
| 24 | void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); | 24 | void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); |
| 25 | 25 | ||
| 26 | enum class AudioFeatures : u32 { | 26 | enum class AudioFeatures : u32 { |
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index a850cadc8..11eba4a12 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <chrono> | 5 | #include <chrono> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <optional> | ||
| 9 | #include <vector> | 8 | #include <vector> |
| 10 | 9 | ||
| 11 | #include <opus.h> | 10 | #include <opus.h> |
| @@ -30,48 +29,66 @@ public: | |||
| 30 | u32 channel_count) | 29 | u32 channel_count) |
| 31 | : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)), | 30 | : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)), |
| 32 | sample_rate(sample_rate), channel_count(channel_count) { | 31 | sample_rate(sample_rate), channel_count(channel_count) { |
| 32 | // clang-format off | ||
| 33 | static const FunctionInfo functions[] = { | 33 | static const FunctionInfo functions[] = { |
| 34 | {0, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, | 34 | {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"}, |
| 35 | {1, nullptr, "SetContext"}, | 35 | {1, nullptr, "SetContext"}, |
| 36 | {2, nullptr, "DecodeInterleavedForMultiStream"}, | 36 | {2, nullptr, "DecodeInterleavedForMultiStreamOld"}, |
| 37 | {3, nullptr, "SetContextForMultiStream"}, | 37 | {3, nullptr, "SetContextForMultiStream"}, |
| 38 | {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerformance, | 38 | {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, |
| 39 | "DecodeInterleavedWithPerformance"}, | 39 | {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, |
| 40 | {5, nullptr, "Unknown5"}, | 40 | {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, |
| 41 | {6, nullptr, "Unknown6"}, | 41 | {7, nullptr, "DecodeInterleavedForMultiStream"}, |
| 42 | {7, nullptr, "Unknown7"}, | ||
| 43 | }; | 42 | }; |
| 43 | // clang-format on | ||
| 44 | |||
| 44 | RegisterHandlers(functions); | 45 | RegisterHandlers(functions); |
| 45 | } | 46 | } |
| 46 | 47 | ||
| 47 | private: | 48 | private: |
| 48 | void DecodeInterleaved(Kernel::HLERequestContext& ctx) { | 49 | /// Describes extra behavior that may be asked of the decoding context. |
| 50 | enum class ExtraBehavior { | ||
| 51 | /// No extra behavior. | ||
| 52 | None, | ||
| 53 | |||
| 54 | /// Resets the decoder context back to a freshly initialized state. | ||
| 55 | ResetContext, | ||
| 56 | }; | ||
| 57 | |||
| 58 | void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { | ||
| 49 | LOG_DEBUG(Audio, "called"); | 59 | LOG_DEBUG(Audio, "called"); |
| 50 | 60 | ||
| 51 | u32 consumed = 0; | 61 | DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); |
| 52 | u32 sample_count = 0; | ||
| 53 | std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); | ||
| 54 | if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples)) { | ||
| 55 | LOG_ERROR(Audio, "Failed to decode opus data"); | ||
| 56 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 57 | // TODO(ogniK): Use correct error code | ||
| 58 | rb.Push(ResultCode(-1)); | ||
| 59 | return; | ||
| 60 | } | ||
| 61 | IPC::ResponseBuilder rb{ctx, 4}; | ||
| 62 | rb.Push(RESULT_SUCCESS); | ||
| 63 | rb.Push<u32>(consumed); | ||
| 64 | rb.Push<u32>(sample_count); | ||
| 65 | ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); | ||
| 66 | } | 62 | } |
| 67 | 63 | ||
| 68 | void DecodeInterleavedWithPerformance(Kernel::HLERequestContext& ctx) { | 64 | void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) { |
| 65 | LOG_DEBUG(Audio, "called"); | ||
| 66 | |||
| 67 | u64 performance = 0; | ||
| 68 | DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None); | ||
| 69 | } | ||
| 70 | |||
| 71 | void DecodeInterleaved(Kernel::HLERequestContext& ctx) { | ||
| 69 | LOG_DEBUG(Audio, "called"); | 72 | LOG_DEBUG(Audio, "called"); |
| 70 | 73 | ||
| 74 | IPC::RequestParser rp{ctx}; | ||
| 75 | const auto extra_behavior = | ||
| 76 | rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None; | ||
| 77 | |||
| 78 | u64 performance = 0; | ||
| 79 | DecodeInterleavedHelper(ctx, &performance, extra_behavior); | ||
| 80 | } | ||
| 81 | |||
| 82 | void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, | ||
| 83 | ExtraBehavior extra_behavior) { | ||
| 71 | u32 consumed = 0; | 84 | u32 consumed = 0; |
| 72 | u32 sample_count = 0; | 85 | u32 sample_count = 0; |
| 73 | u64 performance = 0; | ||
| 74 | std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); | 86 | std::vector<opus_int16> samples(ctx.GetWriteBufferSize() / sizeof(opus_int16)); |
| 87 | |||
| 88 | if (extra_behavior == ExtraBehavior::ResetContext) { | ||
| 89 | ResetDecoderContext(); | ||
| 90 | } | ||
| 91 | |||
| 75 | if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, | 92 | if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, |
| 76 | performance)) { | 93 | performance)) { |
| 77 | LOG_ERROR(Audio, "Failed to decode opus data"); | 94 | LOG_ERROR(Audio, "Failed to decode opus data"); |
| @@ -80,25 +97,28 @@ private: | |||
| 80 | rb.Push(ResultCode(-1)); | 97 | rb.Push(ResultCode(-1)); |
| 81 | return; | 98 | return; |
| 82 | } | 99 | } |
| 83 | IPC::ResponseBuilder rb{ctx, 6}; | 100 | |
| 101 | const u32 param_size = performance != nullptr ? 6 : 4; | ||
| 102 | IPC::ResponseBuilder rb{ctx, param_size}; | ||
| 84 | rb.Push(RESULT_SUCCESS); | 103 | rb.Push(RESULT_SUCCESS); |
| 85 | rb.Push<u32>(consumed); | 104 | rb.Push<u32>(consumed); |
| 86 | rb.Push<u32>(sample_count); | 105 | rb.Push<u32>(sample_count); |
| 87 | rb.Push<u64>(performance); | 106 | if (performance) { |
| 107 | rb.Push<u64>(*performance); | ||
| 108 | } | ||
| 88 | ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); | 109 | ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); |
| 89 | } | 110 | } |
| 90 | 111 | ||
| 91 | bool Decoder_DecodeInterleaved( | 112 | bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, |
| 92 | u32& consumed, u32& sample_count, const std::vector<u8>& input, | 113 | std::vector<opus_int16>& output, u64* out_performance_time) { |
| 93 | std::vector<opus_int16>& output, | ||
| 94 | std::optional<std::reference_wrapper<u64>> performance_time = std::nullopt) { | ||
| 95 | const auto start_time = std::chrono::high_resolution_clock::now(); | 114 | const auto start_time = std::chrono::high_resolution_clock::now(); |
| 96 | std::size_t raw_output_sz = output.size() * sizeof(opus_int16); | 115 | const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); |
| 97 | if (sizeof(OpusHeader) > input.size()) { | 116 | if (sizeof(OpusHeader) > input.size()) { |
| 98 | LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", | 117 | LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", |
| 99 | sizeof(OpusHeader), input.size()); | 118 | sizeof(OpusHeader), input.size()); |
| 100 | return false; | 119 | return false; |
| 101 | } | 120 | } |
| 121 | |||
| 102 | OpusHeader hdr{}; | 122 | OpusHeader hdr{}; |
| 103 | std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); | 123 | std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); |
| 104 | if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { | 124 | if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { |
| @@ -106,8 +126,9 @@ private: | |||
| 106 | sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); | 126 | sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); |
| 107 | return false; | 127 | return false; |
| 108 | } | 128 | } |
| 109 | auto frame = input.data() + sizeof(OpusHeader); | 129 | |
| 110 | auto decoded_sample_count = opus_packet_get_nb_samples( | 130 | const auto frame = input.data() + sizeof(OpusHeader); |
| 131 | const auto decoded_sample_count = opus_packet_get_nb_samples( | ||
| 111 | frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), | 132 | frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), |
| 112 | static_cast<opus_int32>(sample_rate)); | 133 | static_cast<opus_int32>(sample_rate)); |
| 113 | if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { | 134 | if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { |
| @@ -117,8 +138,9 @@ private: | |||
| 117 | decoded_sample_count * channel_count * sizeof(u16), raw_output_sz); | 138 | decoded_sample_count * channel_count * sizeof(u16), raw_output_sz); |
| 118 | return false; | 139 | return false; |
| 119 | } | 140 | } |
| 141 | |||
| 120 | const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); | 142 | const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); |
| 121 | auto out_sample_count = | 143 | const auto out_sample_count = |
| 122 | opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); | 144 | opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); |
| 123 | if (out_sample_count < 0) { | 145 | if (out_sample_count < 0) { |
| 124 | LOG_ERROR(Audio, | 146 | LOG_ERROR(Audio, |
| @@ -127,16 +149,24 @@ private: | |||
| 127 | out_sample_count, frame_size, static_cast<u32>(hdr.sz)); | 149 | out_sample_count, frame_size, static_cast<u32>(hdr.sz)); |
| 128 | return false; | 150 | return false; |
| 129 | } | 151 | } |
| 152 | |||
| 130 | const auto end_time = std::chrono::high_resolution_clock::now() - start_time; | 153 | const auto end_time = std::chrono::high_resolution_clock::now() - start_time; |
| 131 | sample_count = out_sample_count; | 154 | sample_count = out_sample_count; |
| 132 | consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); | 155 | consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); |
| 133 | if (performance_time.has_value()) { | 156 | if (out_performance_time != nullptr) { |
| 134 | performance_time->get() = | 157 | *out_performance_time = |
| 135 | std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); | 158 | std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); |
| 136 | } | 159 | } |
| 160 | |||
| 137 | return true; | 161 | return true; |
| 138 | } | 162 | } |
| 139 | 163 | ||
| 164 | void ResetDecoderContext() { | ||
| 165 | ASSERT(decoder != nullptr); | ||
| 166 | |||
| 167 | opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); | ||
| 168 | } | ||
| 169 | |||
| 140 | struct OpusHeader { | 170 | struct OpusHeader { |
| 141 | u32_be sz; // Needs to be BE for some odd reason | 171 | u32_be sz; // Needs to be BE for some odd reason |
| 142 | INSERT_PADDING_WORDS(1); | 172 | INSERT_PADDING_WORDS(1); |
| @@ -157,6 +187,7 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 157 | IPC::RequestParser rp{ctx}; | 187 | IPC::RequestParser rp{ctx}; |
| 158 | const auto sample_rate = rp.Pop<u32>(); | 188 | const auto sample_rate = rp.Pop<u32>(); |
| 159 | const auto channel_count = rp.Pop<u32>(); | 189 | const auto channel_count = rp.Pop<u32>(); |
| 190 | |||
| 160 | LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count); | 191 | LOG_DEBUG(Audio, "called with sample_rate={}, channel_count={}", sample_rate, channel_count); |
| 161 | 192 | ||
| 162 | ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 || | 193 | ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 || |
| @@ -174,9 +205,10 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { | |||
| 174 | 205 | ||
| 175 | void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { | 206 | void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { |
| 176 | IPC::RequestParser rp{ctx}; | 207 | IPC::RequestParser rp{ctx}; |
| 177 | auto sample_rate = rp.Pop<u32>(); | 208 | const auto sample_rate = rp.Pop<u32>(); |
| 178 | auto channel_count = rp.Pop<u32>(); | 209 | const auto channel_count = rp.Pop<u32>(); |
| 179 | auto buffer_sz = rp.Pop<u32>(); | 210 | const auto buffer_sz = rp.Pop<u32>(); |
| 211 | |||
| 180 | LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate, | 212 | LOG_DEBUG(Audio, "called sample_rate={}, channel_count={}, buffer_size={}", sample_rate, |
| 181 | channel_count, buffer_sz); | 213 | channel_count, buffer_sz); |
| 182 | 214 | ||
| @@ -185,8 +217,9 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { | |||
| 185 | "Invalid sample rate"); | 217 | "Invalid sample rate"); |
| 186 | ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); | 218 | ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); |
| 187 | 219 | ||
| 188 | std::size_t worker_sz = WorkerBufferSize(channel_count); | 220 | const std::size_t worker_sz = WorkerBufferSize(channel_count); |
| 189 | ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); | 221 | ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); |
| 222 | |||
| 190 | std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ | 223 | std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ |
| 191 | static_cast<OpusDecoder*>(operator new(worker_sz))}; | 224 | static_cast<OpusDecoder*>(operator new(worker_sz))}; |
| 192 | if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { | 225 | if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { |
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp index 5704ca0ab..59ef603e1 100644 --- a/src/core/hle/service/btdrv/btdrv.cpp +++ b/src/core/hle/service/btdrv/btdrv.cpp | |||
| @@ -19,16 +19,16 @@ public: | |||
| 19 | explicit Bt() : ServiceFramework{"bt"} { | 19 | explicit Bt() : ServiceFramework{"bt"} { |
| 20 | // clang-format off | 20 | // clang-format off |
| 21 | static const FunctionInfo functions[] = { | 21 | static const FunctionInfo functions[] = { |
| 22 | {0, nullptr, "Unknown0"}, | 22 | {0, nullptr, "LeClientReadCharacteristic"}, |
| 23 | {1, nullptr, "Unknown1"}, | 23 | {1, nullptr, "LeClientReadDescriptor"}, |
| 24 | {2, nullptr, "Unknown2"}, | 24 | {2, nullptr, "LeClientWriteCharacteristic"}, |
| 25 | {3, nullptr, "Unknown3"}, | 25 | {3, nullptr, "LeClientWriteDescriptor"}, |
| 26 | {4, nullptr, "Unknown4"}, | 26 | {4, nullptr, "LeClientRegisterNotification"}, |
| 27 | {5, nullptr, "Unknown5"}, | 27 | {5, nullptr, "LeClientDeregisterNotification"}, |
| 28 | {6, nullptr, "Unknown6"}, | 28 | {6, nullptr, "SetLeResponse"}, |
| 29 | {7, nullptr, "Unknown7"}, | 29 | {7, nullptr, "LeSendIndication"}, |
| 30 | {8, nullptr, "Unknown8"}, | 30 | {8, nullptr, "GetLeEventInfo"}, |
| 31 | {9, &Bt::RegisterEvent, "RegisterEvent"}, | 31 | {9, &Bt::RegisterBleEvent, "RegisterBleEvent"}, |
| 32 | }; | 32 | }; |
| 33 | // clang-format on | 33 | // clang-format on |
| 34 | RegisterHandlers(functions); | 34 | RegisterHandlers(functions); |
| @@ -39,7 +39,7 @@ public: | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | private: | 41 | private: |
| 42 | void RegisterEvent(Kernel::HLERequestContext& ctx) { | 42 | void RegisterBleEvent(Kernel::HLERequestContext& ctx) { |
| 43 | LOG_WARNING(Service_BTM, "(STUBBED) called"); | 43 | LOG_WARNING(Service_BTM, "(STUBBED) called"); |
| 44 | 44 | ||
| 45 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 45 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -55,11 +55,11 @@ public: | |||
| 55 | explicit BtDrv() : ServiceFramework{"btdrv"} { | 55 | explicit BtDrv() : ServiceFramework{"btdrv"} { |
| 56 | // clang-format off | 56 | // clang-format off |
| 57 | static const FunctionInfo functions[] = { | 57 | static const FunctionInfo functions[] = { |
| 58 | {0, nullptr, "Unknown"}, | 58 | {0, nullptr, "InitializeBluetoothDriver"}, |
| 59 | {1, nullptr, "Init"}, | 59 | {1, nullptr, "InitializeBluetooth"}, |
| 60 | {2, nullptr, "Enable"}, | 60 | {2, nullptr, "EnableBluetooth"}, |
| 61 | {3, nullptr, "Disable"}, | 61 | {3, nullptr, "DisableBluetooth"}, |
| 62 | {4, nullptr, "CleanupAndShutdown"}, | 62 | {4, nullptr, "CleanupBluetooth"}, |
| 63 | {5, nullptr, "GetAdapterProperties"}, | 63 | {5, nullptr, "GetAdapterProperties"}, |
| 64 | {6, nullptr, "GetAdapterProperty"}, | 64 | {6, nullptr, "GetAdapterProperty"}, |
| 65 | {7, nullptr, "SetAdapterProperty"}, | 65 | {7, nullptr, "SetAdapterProperty"}, |
| @@ -70,36 +70,91 @@ public: | |||
| 70 | {12, nullptr, "CancelBond"}, | 70 | {12, nullptr, "CancelBond"}, |
| 71 | {13, nullptr, "PinReply"}, | 71 | {13, nullptr, "PinReply"}, |
| 72 | {14, nullptr, "SspReply"}, | 72 | {14, nullptr, "SspReply"}, |
| 73 | {15, nullptr, "Unknown2"}, | 73 | {15, nullptr, "GetEventInfo"}, |
| 74 | {16, nullptr, "InitInterfaces"}, | 74 | {16, nullptr, "InitializeHid"}, |
| 75 | {17, nullptr, "HidHostInterface_Connect"}, | 75 | {17, nullptr, "HidConnect"}, |
| 76 | {18, nullptr, "HidHostInterface_Disconnect"}, | 76 | {18, nullptr, "HidDisconnect"}, |
| 77 | {19, nullptr, "HidHostInterface_SendData"}, | 77 | {19, nullptr, "HidSendData"}, |
| 78 | {20, nullptr, "HidHostInterface_SendData2"}, | 78 | {20, nullptr, "HidSendData2"}, |
| 79 | {21, nullptr, "HidHostInterface_SetReport"}, | 79 | {21, nullptr, "HidSetReport"}, |
| 80 | {22, nullptr, "HidHostInterface_GetReport"}, | 80 | {22, nullptr, "HidGetReport"}, |
| 81 | {23, nullptr, "HidHostInterface_WakeController"}, | 81 | {23, nullptr, "HidWakeController"}, |
| 82 | {24, nullptr, "HidHostInterface_AddPairedDevice"}, | 82 | {24, nullptr, "HidAddPairedDevice"}, |
| 83 | {25, nullptr, "HidHostInterface_GetPairedDevice"}, | 83 | {25, nullptr, "HidGetPairedDevice"}, |
| 84 | {26, nullptr, "HidHostInterface_CleanupAndShutdown"}, | 84 | {26, nullptr, "CleanupHid"}, |
| 85 | {27, nullptr, "Unknown3"}, | 85 | {27, nullptr, "HidGetEventInfo"}, |
| 86 | {28, nullptr, "ExtInterface_SetTSI"}, | 86 | {28, nullptr, "ExtSetTsi"}, |
| 87 | {29, nullptr, "ExtInterface_SetBurstMode"}, | 87 | {29, nullptr, "ExtSetBurstMode"}, |
| 88 | {30, nullptr, "ExtInterface_SetZeroRetran"}, | 88 | {30, nullptr, "ExtSetZeroRetran"}, |
| 89 | {31, nullptr, "ExtInterface_SetMcMode"}, | 89 | {31, nullptr, "ExtSetMcMode"}, |
| 90 | {32, nullptr, "ExtInterface_StartLlrMode"}, | 90 | {32, nullptr, "ExtStartLlrMode"}, |
| 91 | {33, nullptr, "ExtInterface_ExitLlrMode"}, | 91 | {33, nullptr, "ExtExitLlrMode"}, |
| 92 | {34, nullptr, "ExtInterface_SetRadio"}, | 92 | {34, nullptr, "ExtSetRadio"}, |
| 93 | {35, nullptr, "ExtInterface_SetVisibility"}, | 93 | {35, nullptr, "ExtSetVisibility"}, |
| 94 | {36, nullptr, "Unknown4"}, | 94 | {36, nullptr, "ExtSetTbfcScan"}, |
| 95 | {37, nullptr, "Unknown5"}, | 95 | {37, nullptr, "RegisterHidReportEvent"}, |
| 96 | {38, nullptr, "HidHostInterface_GetLatestPlr"}, | 96 | {38, nullptr, "HidGetReportEventInfo"}, |
| 97 | {39, nullptr, "ExtInterface_GetPendingConnections"}, | 97 | {39, nullptr, "GetLatestPlr"}, |
| 98 | {40, nullptr, "HidHostInterface_GetChannelMap"}, | 98 | {40, nullptr, "ExtGetPendingConnections"}, |
| 99 | {41, nullptr, "SetIsBluetoothBoostEnabled"}, | 99 | {41, nullptr, "GetChannelMap"}, |
| 100 | {42, nullptr, "GetIsBluetoothBoostEnabled"}, | 100 | {42, nullptr, "EnableBluetoothBoostSetting"}, |
| 101 | {43, nullptr, "SetIsBluetoothAfhEnabled"}, | 101 | {43, nullptr, "IsBluetoothBoostSettingEnabled"}, |
| 102 | {44, nullptr, "GetIsBluetoothAfhEnabled"}, | 102 | {44, nullptr, "EnableBluetoothAfhSetting"}, |
| 103 | {45, nullptr, "IsBluetoothAfhSettingEnabled"}, | ||
| 104 | {46, nullptr, "InitializeBluetoothLe"}, | ||
| 105 | {47, nullptr, "EnableBluetoothLe"}, | ||
| 106 | {48, nullptr, "DisableBluetoothLe"}, | ||
| 107 | {49, nullptr, "CleanupBluetoothLe"}, | ||
| 108 | {50, nullptr, "SetLeVisibility"}, | ||
| 109 | {51, nullptr, "SetLeConnectionParameter"}, | ||
| 110 | {52, nullptr, "SetLeDefaultConnectionParameter"}, | ||
| 111 | {53, nullptr, "SetLeAdvertiseData"}, | ||
| 112 | {54, nullptr, "SetLeAdvertiseParameter"}, | ||
| 113 | {55, nullptr, "StartLeScan"}, | ||
| 114 | {56, nullptr, "StopLeScan"}, | ||
| 115 | {57, nullptr, "AddLeScanFilterCondition"}, | ||
| 116 | {58, nullptr, "DeleteLeScanFilterCondition"}, | ||
| 117 | {59, nullptr, "DeleteLeScanFilter"}, | ||
| 118 | {60, nullptr, "ClearLeScanFilters"}, | ||
| 119 | {61, nullptr, "EnableLeScanFilter"}, | ||
| 120 | {62, nullptr, "RegisterLeClient"}, | ||
| 121 | {63, nullptr, "UnregisterLeClient"}, | ||
| 122 | {64, nullptr, "UnregisterLeClientAll"}, | ||
| 123 | {65, nullptr, "LeClientConnect"}, | ||
| 124 | {66, nullptr, "LeClientCancelConnection"}, | ||
| 125 | {67, nullptr, "LeClientDisconnect"}, | ||
| 126 | {68, nullptr, "LeClientGetAttributes"}, | ||
| 127 | {69, nullptr, "LeClientDiscoverService"}, | ||
| 128 | {70, nullptr, "LeClientConfigureMtu"}, | ||
| 129 | {71, nullptr, "RegisterLeServer"}, | ||
| 130 | {72, nullptr, "UnregisterLeServer"}, | ||
| 131 | {73, nullptr, "LeServerConnect"}, | ||
| 132 | {74, nullptr, "LeServerDisconnect"}, | ||
| 133 | {75, nullptr, "CreateLeService"}, | ||
| 134 | {76, nullptr, "StartLeService"}, | ||
| 135 | {77, nullptr, "AddLeCharacteristic"}, | ||
| 136 | {78, nullptr, "AddLeDescriptor"}, | ||
| 137 | {79, nullptr, "GetLeCoreEventInfo"}, | ||
| 138 | {80, nullptr, "LeGetFirstCharacteristic"}, | ||
| 139 | {81, nullptr, "LeGetNextCharacteristic"}, | ||
| 140 | {82, nullptr, "LeGetFirstDescriptor"}, | ||
| 141 | {83, nullptr, "LeGetNextDescriptor"}, | ||
| 142 | {84, nullptr, "RegisterLeCoreDataPath"}, | ||
| 143 | {85, nullptr, "UnregisterLeCoreDataPath"}, | ||
| 144 | {86, nullptr, "RegisterLeHidDataPath"}, | ||
| 145 | {87, nullptr, "UnregisterLeHidDataPath"}, | ||
| 146 | {88, nullptr, "RegisterLeDataPath"}, | ||
| 147 | {89, nullptr, "UnregisterLeDataPath"}, | ||
| 148 | {90, nullptr, "LeClientReadCharacteristic"}, | ||
| 149 | {91, nullptr, "LeClientReadDescriptor"}, | ||
| 150 | {92, nullptr, "LeClientWriteCharacteristic"}, | ||
| 151 | {93, nullptr, "LeClientWriteDescriptor"}, | ||
| 152 | {94, nullptr, "LeClientRegisterNotification"}, | ||
| 153 | {95, nullptr, "LeClientDeregisterNotification"}, | ||
| 154 | {96, nullptr, "GetLeHidEventInfo"}, | ||
| 155 | {97, nullptr, "RegisterBleHidEvent"}, | ||
| 156 | {98, nullptr, "SetLeScanParameter"}, | ||
| 157 | {256, nullptr, "GetIsManufacturingMode"} | ||
| 103 | }; | 158 | }; |
| 104 | // clang-format on | 159 | // clang-format on |
| 105 | 160 | ||
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp index ef7398a23..4f15c3f19 100644 --- a/src/core/hle/service/btm/btm.cpp +++ b/src/core/hle/service/btm/btm.cpp | |||
| @@ -20,38 +20,38 @@ public: | |||
| 20 | explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} { | 20 | explicit IBtmUserCore() : ServiceFramework{"IBtmUserCore"} { |
| 21 | // clang-format off | 21 | // clang-format off |
| 22 | static const FunctionInfo functions[] = { | 22 | static const FunctionInfo functions[] = { |
| 23 | {0, &IBtmUserCore::GetScanEvent, "GetScanEvent"}, | 23 | {0, &IBtmUserCore::AcquireBleScanEvent, "AcquireBleScanEvent"}, |
| 24 | {1, nullptr, "Unknown1"}, | 24 | {1, nullptr, "GetBleScanFilterParameter"}, |
| 25 | {2, nullptr, "Unknown2"}, | 25 | {2, nullptr, "GetBleScanFilterParameter2"}, |
| 26 | {3, nullptr, "Unknown3"}, | 26 | {3, nullptr, "StartBleScanForGeneral"}, |
| 27 | {4, nullptr, "Unknown4"}, | 27 | {4, nullptr, "StopBleScanForGeneral"}, |
| 28 | {5, nullptr, "Unknown5"}, | 28 | {5, nullptr, "GetBleScanResultsForGeneral"}, |
| 29 | {6, nullptr, "Unknown6"}, | 29 | {6, nullptr, "StartBleScanForPaired"}, |
| 30 | {7, nullptr, "Unknown7"}, | 30 | {7, nullptr, "StopBleScanForPaired"}, |
| 31 | {8, nullptr, "Unknown8"}, | 31 | {8, nullptr, "StartBleScanForSmartDevice"}, |
| 32 | {9, nullptr, "Unknown9"}, | 32 | {9, nullptr, "StopBleScanForSmartDevice"}, |
| 33 | {10, nullptr, "Unknown10"}, | 33 | {10, nullptr, "GetBleScanResultsForSmartDevice"}, |
| 34 | {17, &IBtmUserCore::GetConnectionEvent, "GetConnectionEvent"}, | 34 | {17, &IBtmUserCore::AcquireBleConnectionEvent, "AcquireBleConnectionEvent"}, |
| 35 | {18, nullptr, "Unknown18"}, | 35 | {18, nullptr, "BleConnect"}, |
| 36 | {19, nullptr, "Unknown19"}, | 36 | {19, nullptr, "BleDisconnect"}, |
| 37 | {20, nullptr, "Unknown20"}, | 37 | {20, nullptr, "BleGetConnectionState"}, |
| 38 | {21, nullptr, "Unknown21"}, | 38 | {21, nullptr, "AcquireBlePairingEvent"}, |
| 39 | {22, nullptr, "Unknown22"}, | 39 | {22, nullptr, "BlePairDevice"}, |
| 40 | {23, nullptr, "Unknown23"}, | 40 | {23, nullptr, "BleUnPairDevice"}, |
| 41 | {24, nullptr, "Unknown24"}, | 41 | {24, nullptr, "BleUnPairDevice2"}, |
| 42 | {25, nullptr, "Unknown25"}, | 42 | {25, nullptr, "BleGetPairedDevices"}, |
| 43 | {26, &IBtmUserCore::GetDiscoveryEvent, "AcquireBleServiceDiscoveryEventImpl"}, | 43 | {26, &IBtmUserCore::AcquireBleServiceDiscoveryEvent, "AcquireBleServiceDiscoveryEvent"}, |
| 44 | {27, nullptr, "Unknown27"}, | 44 | {27, nullptr, "GetGattServices"}, |
| 45 | {28, nullptr, "Unknown28"}, | 45 | {28, nullptr, "GetGattService"}, |
| 46 | {29, nullptr, "Unknown29"}, | 46 | {29, nullptr, "GetGattIncludedServices"}, |
| 47 | {30, nullptr, "Unknown30"}, | 47 | {30, nullptr, "GetBelongingGattService"}, |
| 48 | {31, nullptr, "Unknown31"}, | 48 | {31, nullptr, "GetGattCharacteristics"}, |
| 49 | {32, nullptr, "Unknown32"}, | 49 | {32, nullptr, "GetGattDescriptors"}, |
| 50 | {33, &IBtmUserCore::GetConfigEvent, "GetConfigEvent"}, | 50 | {33, &IBtmUserCore::AcquireBleMtuConfigEvent, "AcquireBleMtuConfigEvent"}, |
| 51 | {34, nullptr, "Unknown34"}, | 51 | {34, nullptr, "ConfigureBleMtu"}, |
| 52 | {35, nullptr, "Unknown35"}, | 52 | {35, nullptr, "GetBleMtu"}, |
| 53 | {36, nullptr, "Unknown36"}, | 53 | {36, nullptr, "RegisterBleGattDataPath"}, |
| 54 | {37, nullptr, "Unknown37"}, | 54 | {37, nullptr, "UnregisterBleGattDataPath"}, |
| 55 | }; | 55 | }; |
| 56 | // clang-format on | 56 | // clang-format on |
| 57 | RegisterHandlers(functions); | 57 | RegisterHandlers(functions); |
| @@ -68,7 +68,7 @@ public: | |||
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | private: | 70 | private: |
| 71 | void GetScanEvent(Kernel::HLERequestContext& ctx) { | 71 | void AcquireBleScanEvent(Kernel::HLERequestContext& ctx) { |
| 72 | LOG_WARNING(Service_BTM, "(STUBBED) called"); | 72 | LOG_WARNING(Service_BTM, "(STUBBED) called"); |
| 73 | 73 | ||
| 74 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 74 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -76,7 +76,7 @@ private: | |||
| 76 | rb.PushCopyObjects(scan_event.readable); | 76 | rb.PushCopyObjects(scan_event.readable); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void GetConnectionEvent(Kernel::HLERequestContext& ctx) { | 79 | void AcquireBleConnectionEvent(Kernel::HLERequestContext& ctx) { |
| 80 | LOG_WARNING(Service_BTM, "(STUBBED) called"); | 80 | LOG_WARNING(Service_BTM, "(STUBBED) called"); |
| 81 | 81 | ||
| 82 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 82 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -84,7 +84,7 @@ private: | |||
| 84 | rb.PushCopyObjects(connection_event.readable); | 84 | rb.PushCopyObjects(connection_event.readable); |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | void GetDiscoveryEvent(Kernel::HLERequestContext& ctx) { | 87 | void AcquireBleServiceDiscoveryEvent(Kernel::HLERequestContext& ctx) { |
| 88 | LOG_WARNING(Service_BTM, "(STUBBED) called"); | 88 | LOG_WARNING(Service_BTM, "(STUBBED) called"); |
| 89 | 89 | ||
| 90 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 90 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -92,7 +92,7 @@ private: | |||
| 92 | rb.PushCopyObjects(service_discovery.readable); | 92 | rb.PushCopyObjects(service_discovery.readable); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | void GetConfigEvent(Kernel::HLERequestContext& ctx) { | 95 | void AcquireBleMtuConfigEvent(Kernel::HLERequestContext& ctx) { |
| 96 | LOG_WARNING(Service_BTM, "(STUBBED) called"); | 96 | LOG_WARNING(Service_BTM, "(STUBBED) called"); |
| 97 | 97 | ||
| 98 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 98 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -111,14 +111,14 @@ public: | |||
| 111 | explicit BTM_USR() : ServiceFramework{"btm:u"} { | 111 | explicit BTM_USR() : ServiceFramework{"btm:u"} { |
| 112 | // clang-format off | 112 | // clang-format off |
| 113 | static const FunctionInfo functions[] = { | 113 | static const FunctionInfo functions[] = { |
| 114 | {0, &BTM_USR::GetCoreImpl, "GetCoreImpl"}, | 114 | {0, &BTM_USR::GetCore, "GetCore"}, |
| 115 | }; | 115 | }; |
| 116 | // clang-format on | 116 | // clang-format on |
| 117 | RegisterHandlers(functions); | 117 | RegisterHandlers(functions); |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | private: | 120 | private: |
| 121 | void GetCoreImpl(Kernel::HLERequestContext& ctx) { | 121 | void GetCore(Kernel::HLERequestContext& ctx) { |
| 122 | LOG_DEBUG(Service_BTM, "called"); | 122 | LOG_DEBUG(Service_BTM, "called"); |
| 123 | 123 | ||
| 124 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 124 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| @@ -134,26 +134,64 @@ public: | |||
| 134 | static const FunctionInfo functions[] = { | 134 | static const FunctionInfo functions[] = { |
| 135 | {0, nullptr, "Unknown1"}, | 135 | {0, nullptr, "Unknown1"}, |
| 136 | {1, nullptr, "Unknown2"}, | 136 | {1, nullptr, "Unknown2"}, |
| 137 | {2, nullptr, "RegisterSystemEventForConnectedDeviceConditionImpl"}, | 137 | {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"}, |
| 138 | {3, nullptr, "Unknown3"}, | 138 | {3, nullptr, "Unknown3"}, |
| 139 | {4, nullptr, "Unknown4"}, | 139 | {4, nullptr, "Unknown4"}, |
| 140 | {5, nullptr, "Unknown5"}, | 140 | {5, nullptr, "Unknown5"}, |
| 141 | {6, nullptr, "Unknown6"}, | 141 | {6, nullptr, "Unknown6"}, |
| 142 | {7, nullptr, "Unknown7"}, | 142 | {7, nullptr, "Unknown7"}, |
| 143 | {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfoImpl"}, | 143 | {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"}, |
| 144 | {9, nullptr, "Unknown8"}, | 144 | {9, nullptr, "Unknown8"}, |
| 145 | {10, nullptr, "Unknown9"}, | 145 | {10, nullptr, "Unknown9"}, |
| 146 | {11, nullptr, "Unknown10"}, | 146 | {11, nullptr, "Unknown10"}, |
| 147 | {12, nullptr, "Unknown11"}, | 147 | {12, nullptr, "Unknown11"}, |
| 148 | {13, nullptr, "Unknown12"}, | 148 | {13, nullptr, "Unknown12"}, |
| 149 | {14, nullptr, "EnableRadioImpl"}, | 149 | {14, nullptr, "EnableRadio"}, |
| 150 | {15, nullptr, "DisableRadioImpl"}, | 150 | {15, nullptr, "DisableRadio"}, |
| 151 | {16, nullptr, "Unknown13"}, | 151 | {16, nullptr, "Unknown13"}, |
| 152 | {17, nullptr, "Unknown14"}, | 152 | {17, nullptr, "Unknown14"}, |
| 153 | {18, nullptr, "Unknown15"}, | 153 | {18, nullptr, "Unknown15"}, |
| 154 | {19, nullptr, "Unknown16"}, | 154 | {19, nullptr, "Unknown16"}, |
| 155 | {20, nullptr, "Unknown17"}, | 155 | {20, nullptr, "Unknown17"}, |
| 156 | {21, nullptr, "Unknown18"}, | 156 | {21, nullptr, "Unknown18"}, |
| 157 | {22, nullptr, "Unknown19"}, | ||
| 158 | {23, nullptr, "Unknown20"}, | ||
| 159 | {24, nullptr, "Unknown21"}, | ||
| 160 | {25, nullptr, "Unknown22"}, | ||
| 161 | {26, nullptr, "Unknown23"}, | ||
| 162 | {27, nullptr, "Unknown24"}, | ||
| 163 | {28, nullptr, "Unknown25"}, | ||
| 164 | {29, nullptr, "Unknown26"}, | ||
| 165 | {30, nullptr, "Unknown27"}, | ||
| 166 | {31, nullptr, "Unknown28"}, | ||
| 167 | {32, nullptr, "Unknown29"}, | ||
| 168 | {33, nullptr, "Unknown30"}, | ||
| 169 | {34, nullptr, "Unknown31"}, | ||
| 170 | {35, nullptr, "Unknown32"}, | ||
| 171 | {36, nullptr, "Unknown33"}, | ||
| 172 | {37, nullptr, "Unknown34"}, | ||
| 173 | {38, nullptr, "Unknown35"}, | ||
| 174 | {39, nullptr, "Unknown36"}, | ||
| 175 | {40, nullptr, "Unknown37"}, | ||
| 176 | {41, nullptr, "Unknown38"}, | ||
| 177 | {42, nullptr, "Unknown39"}, | ||
| 178 | {43, nullptr, "Unknown40"}, | ||
| 179 | {44, nullptr, "Unknown41"}, | ||
| 180 | {45, nullptr, "Unknown42"}, | ||
| 181 | {46, nullptr, "Unknown43"}, | ||
| 182 | {47, nullptr, "Unknown44"}, | ||
| 183 | {48, nullptr, "Unknown45"}, | ||
| 184 | {49, nullptr, "Unknown46"}, | ||
| 185 | {50, nullptr, "Unknown47"}, | ||
| 186 | {51, nullptr, "Unknown48"}, | ||
| 187 | {52, nullptr, "Unknown49"}, | ||
| 188 | {53, nullptr, "Unknown50"}, | ||
| 189 | {54, nullptr, "Unknown51"}, | ||
| 190 | {55, nullptr, "Unknown52"}, | ||
| 191 | {56, nullptr, "Unknown53"}, | ||
| 192 | {57, nullptr, "Unknown54"}, | ||
| 193 | {58, nullptr, "Unknown55"}, | ||
| 194 | {59, nullptr, "Unknown56"}, | ||
| 157 | }; | 195 | }; |
| 158 | // clang-format on | 196 | // clang-format on |
| 159 | 197 | ||
| @@ -166,7 +204,7 @@ public: | |||
| 166 | explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { | 204 | explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { |
| 167 | // clang-format off | 205 | // clang-format off |
| 168 | static const FunctionInfo functions[] = { | 206 | static const FunctionInfo functions[] = { |
| 169 | {0, nullptr, "RegisterSystemEventForDiscoveryImpl"}, | 207 | {0, nullptr, "RegisterSystemEventForDiscovery"}, |
| 170 | {1, nullptr, "Unknown1"}, | 208 | {1, nullptr, "Unknown1"}, |
| 171 | {2, nullptr, "Unknown2"}, | 209 | {2, nullptr, "Unknown2"}, |
| 172 | {3, nullptr, "Unknown3"}, | 210 | {3, nullptr, "Unknown3"}, |
| @@ -175,6 +213,10 @@ public: | |||
| 175 | {6, nullptr, "Unknown6"}, | 213 | {6, nullptr, "Unknown6"}, |
| 176 | {7, nullptr, "Unknown7"}, | 214 | {7, nullptr, "Unknown7"}, |
| 177 | {8, nullptr, "Unknown8"}, | 215 | {8, nullptr, "Unknown8"}, |
| 216 | {9, nullptr, "Unknown9"}, | ||
| 217 | {10, nullptr, "Unknown10"}, | ||
| 218 | {11, nullptr, "Unknown11"}, | ||
| 219 | {12, nullptr, "Unknown11"}, | ||
| 178 | }; | 220 | }; |
| 179 | // clang-format on | 221 | // clang-format on |
| 180 | 222 | ||
| @@ -187,16 +229,16 @@ public: | |||
| 187 | explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} { | 229 | explicit IBtmSystemCore() : ServiceFramework{"IBtmSystemCore"} { |
| 188 | // clang-format off | 230 | // clang-format off |
| 189 | static const FunctionInfo functions[] = { | 231 | static const FunctionInfo functions[] = { |
| 190 | {0, nullptr, "StartGamepadPairingImpl"}, | 232 | {0, nullptr, "StartGamepadPairing"}, |
| 191 | {1, nullptr, "CancelGamepadPairingImpl"}, | 233 | {1, nullptr, "CancelGamepadPairing"}, |
| 192 | {2, nullptr, "ClearGamepadPairingDatabaseImpl"}, | 234 | {2, nullptr, "ClearGamepadPairingDatabase"}, |
| 193 | {3, nullptr, "GetPairedGamepadCountImpl"}, | 235 | {3, nullptr, "GetPairedGamepadCount"}, |
| 194 | {4, nullptr, "EnableRadioImpl"}, | 236 | {4, nullptr, "EnableRadio"}, |
| 195 | {5, nullptr, "DisableRadioImpl"}, | 237 | {5, nullptr, "DisableRadio"}, |
| 196 | {6, nullptr, "GetRadioOnOffImpl"}, | 238 | {6, nullptr, "GetRadioOnOff"}, |
| 197 | {7, nullptr, "AcquireRadioEventImpl"}, | 239 | {7, nullptr, "AcquireRadioEvent"}, |
| 198 | {8, nullptr, "AcquireGamepadPairingEventImpl"}, | 240 | {8, nullptr, "AcquireGamepadPairingEvent"}, |
| 199 | {9, nullptr, "IsGamepadPairingStartedImpl"}, | 241 | {9, nullptr, "IsGamepadPairingStarted"}, |
| 200 | }; | 242 | }; |
| 201 | // clang-format on | 243 | // clang-format on |
| 202 | 244 | ||
| @@ -209,7 +251,7 @@ public: | |||
| 209 | explicit BTM_SYS() : ServiceFramework{"btm:sys"} { | 251 | explicit BTM_SYS() : ServiceFramework{"btm:sys"} { |
| 210 | // clang-format off | 252 | // clang-format off |
| 211 | static const FunctionInfo functions[] = { | 253 | static const FunctionInfo functions[] = { |
| 212 | {0, &BTM_SYS::GetCoreImpl, "GetCoreImpl"}, | 254 | {0, &BTM_SYS::GetCore, "GetCore"}, |
| 213 | }; | 255 | }; |
| 214 | // clang-format on | 256 | // clang-format on |
| 215 | 257 | ||
| @@ -217,7 +259,7 @@ public: | |||
| 217 | } | 259 | } |
| 218 | 260 | ||
| 219 | private: | 261 | private: |
| 220 | void GetCoreImpl(Kernel::HLERequestContext& ctx) { | 262 | void GetCore(Kernel::HLERequestContext& ctx) { |
| 221 | LOG_DEBUG(Service_BTM, "called"); | 263 | LOG_DEBUG(Service_BTM, "called"); |
| 222 | 264 | ||
| 223 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 265 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp index 74c4e583b..54959edd8 100644 --- a/src/core/hle/service/filesystem/fsp_srv.cpp +++ b/src/core/hle/service/filesystem/fsp_srv.cpp | |||
| @@ -627,8 +627,8 @@ private: | |||
| 627 | FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | 627 | FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { |
| 628 | // clang-format off | 628 | // clang-format off |
| 629 | static const FunctionInfo functions[] = { | 629 | static const FunctionInfo functions[] = { |
| 630 | {0, nullptr, "MountContent"}, | 630 | {0, nullptr, "OpenFileSystem"}, |
| 631 | {1, &FSP_SRV::Initialize, "Initialize"}, | 631 | {1, &FSP_SRV::SetCurrentProcess, "SetCurrentProcess"}, |
| 632 | {2, nullptr, "OpenDataFileSystemByCurrentProcess"}, | 632 | {2, nullptr, "OpenDataFileSystemByCurrentProcess"}, |
| 633 | {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"}, | 633 | {7, &FSP_SRV::OpenFileSystemWithPatch, "OpenFileSystemWithPatch"}, |
| 634 | {8, nullptr, "OpenFileSystemWithId"}, | 634 | {8, nullptr, "OpenFileSystemWithId"}, |
| @@ -637,10 +637,10 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 637 | {12, nullptr, "OpenBisStorage"}, | 637 | {12, nullptr, "OpenBisStorage"}, |
| 638 | {13, nullptr, "InvalidateBisCache"}, | 638 | {13, nullptr, "InvalidateBisCache"}, |
| 639 | {17, nullptr, "OpenHostFileSystem"}, | 639 | {17, nullptr, "OpenHostFileSystem"}, |
| 640 | {18, &FSP_SRV::MountSdCard, "MountSdCard"}, | 640 | {18, &FSP_SRV::OpenSdCardFileSystem, "OpenSdCardFileSystem"}, |
| 641 | {19, nullptr, "FormatSdCardFileSystem"}, | 641 | {19, nullptr, "FormatSdCardFileSystem"}, |
| 642 | {21, nullptr, "DeleteSaveDataFileSystem"}, | 642 | {21, nullptr, "DeleteSaveDataFileSystem"}, |
| 643 | {22, &FSP_SRV::CreateSaveData, "CreateSaveData"}, | 643 | {22, &FSP_SRV::CreateSaveDataFileSystem, "CreateSaveDataFileSystem"}, |
| 644 | {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"}, | 644 | {23, nullptr, "CreateSaveDataFileSystemBySystemSaveDataId"}, |
| 645 | {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"}, | 645 | {24, nullptr, "RegisterSaveDataFileSystemAtomicDeletion"}, |
| 646 | {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"}, | 646 | {25, nullptr, "DeleteSaveDataFileSystemBySaveDataSpaceId"}, |
| @@ -652,7 +652,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 652 | {32, nullptr, "ExtendSaveDataFileSystem"}, | 652 | {32, nullptr, "ExtendSaveDataFileSystem"}, |
| 653 | {33, nullptr, "DeleteCacheStorage"}, | 653 | {33, nullptr, "DeleteCacheStorage"}, |
| 654 | {34, nullptr, "GetCacheStorageSize"}, | 654 | {34, nullptr, "GetCacheStorageSize"}, |
| 655 | {51, &FSP_SRV::MountSaveData, "MountSaveData"}, | 655 | {35, nullptr, "CreateSaveDataFileSystemByHashSalt"}, |
| 656 | {51, &FSP_SRV::OpenSaveDataFileSystem, "OpenSaveDataFileSystem"}, | ||
| 656 | {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"}, | 657 | {52, nullptr, "OpenSaveDataFileSystemBySystemSaveDataId"}, |
| 657 | {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"}, | 658 | {53, &FSP_SRV::OpenReadOnlySaveDataFileSystem, "OpenReadOnlySaveDataFileSystem"}, |
| 658 | {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"}, | 659 | {57, nullptr, "ReadSaveDataFileSystemExtraDataBySaveDataSpaceId"}, |
| @@ -664,21 +665,26 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 664 | {64, nullptr, "OpenSaveDataInternalStorageFileSystem"}, | 665 | {64, nullptr, "OpenSaveDataInternalStorageFileSystem"}, |
| 665 | {65, nullptr, "UpdateSaveDataMacForDebug"}, | 666 | {65, nullptr, "UpdateSaveDataMacForDebug"}, |
| 666 | {66, nullptr, "WriteSaveDataFileSystemExtraData2"}, | 667 | {66, nullptr, "WriteSaveDataFileSystemExtraData2"}, |
| 668 | {67, nullptr, "FindSaveDataWithFilter"}, | ||
| 669 | {68, nullptr, "OpenSaveDataInfoReaderBySaveDataFilter"}, | ||
| 667 | {80, nullptr, "OpenSaveDataMetaFile"}, | 670 | {80, nullptr, "OpenSaveDataMetaFile"}, |
| 668 | {81, nullptr, "OpenSaveDataTransferManager"}, | 671 | {81, nullptr, "OpenSaveDataTransferManager"}, |
| 669 | {82, nullptr, "OpenSaveDataTransferManagerVersion2"}, | 672 | {82, nullptr, "OpenSaveDataTransferManagerVersion2"}, |
| 670 | {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"}, | 673 | {83, nullptr, "OpenSaveDataTransferProhibiterForCloudBackUp"}, |
| 674 | {84, nullptr, "ListApplicationAccessibleSaveDataOwnerId"}, | ||
| 671 | {100, nullptr, "OpenImageDirectoryFileSystem"}, | 675 | {100, nullptr, "OpenImageDirectoryFileSystem"}, |
| 672 | {110, nullptr, "OpenContentStorageFileSystem"}, | 676 | {110, nullptr, "OpenContentStorageFileSystem"}, |
| 677 | {120, nullptr, "OpenCloudBackupWorkStorageFileSystem"}, | ||
| 673 | {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"}, | 678 | {200, &FSP_SRV::OpenDataStorageByCurrentProcess, "OpenDataStorageByCurrentProcess"}, |
| 674 | {201, nullptr, "OpenDataStorageByProgramId"}, | 679 | {201, nullptr, "OpenDataStorageByProgramId"}, |
| 675 | {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, | 680 | {202, &FSP_SRV::OpenDataStorageByDataId, "OpenDataStorageByDataId"}, |
| 676 | {203, &FSP_SRV::OpenRomStorage, "OpenRomStorage"}, | 681 | {203, &FSP_SRV::OpenPatchDataStorageByCurrentProcess, "OpenPatchDataStorageByCurrentProcess"}, |
| 677 | {400, nullptr, "OpenDeviceOperator"}, | 682 | {400, nullptr, "OpenDeviceOperator"}, |
| 678 | {500, nullptr, "OpenSdCardDetectionEventNotifier"}, | 683 | {500, nullptr, "OpenSdCardDetectionEventNotifier"}, |
| 679 | {501, nullptr, "OpenGameCardDetectionEventNotifier"}, | 684 | {501, nullptr, "OpenGameCardDetectionEventNotifier"}, |
| 680 | {510, nullptr, "OpenSystemDataUpdateEventNotifier"}, | 685 | {510, nullptr, "OpenSystemDataUpdateEventNotifier"}, |
| 681 | {511, nullptr, "NotifySystemDataUpdateEvent"}, | 686 | {511, nullptr, "NotifySystemDataUpdateEvent"}, |
| 687 | {520, nullptr, "SimulateGameCardDetectionEvent"}, | ||
| 682 | {600, nullptr, "SetCurrentPosixTime"}, | 688 | {600, nullptr, "SetCurrentPosixTime"}, |
| 683 | {601, nullptr, "QuerySaveDataTotalSize"}, | 689 | {601, nullptr, "QuerySaveDataTotalSize"}, |
| 684 | {602, nullptr, "VerifySaveDataFileSystem"}, | 690 | {602, nullptr, "VerifySaveDataFileSystem"}, |
| @@ -717,6 +723,8 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 717 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, | 723 | {1008, nullptr, "OpenRegisteredUpdatePartition"}, |
| 718 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, | 724 | {1009, nullptr, "GetAndClearMemoryReportInfo"}, |
| 719 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, | 725 | {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, |
| 726 | {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, | ||
| 727 | {1200, nullptr, "OpenMultiCommitManager"}, | ||
| 720 | }; | 728 | }; |
| 721 | // clang-format on | 729 | // clang-format on |
| 722 | RegisterHandlers(functions); | 730 | RegisterHandlers(functions); |
| @@ -724,7 +732,7 @@ FSP_SRV::FSP_SRV() : ServiceFramework("fsp-srv") { | |||
| 724 | 732 | ||
| 725 | FSP_SRV::~FSP_SRV() = default; | 733 | FSP_SRV::~FSP_SRV() = default; |
| 726 | 734 | ||
| 727 | void FSP_SRV::Initialize(Kernel::HLERequestContext& ctx) { | 735 | void FSP_SRV::SetCurrentProcess(Kernel::HLERequestContext& ctx) { |
| 728 | LOG_WARNING(Service_FS, "(STUBBED) called"); | 736 | LOG_WARNING(Service_FS, "(STUBBED) called"); |
| 729 | 737 | ||
| 730 | IPC::ResponseBuilder rb{ctx, 2}; | 738 | IPC::ResponseBuilder rb{ctx, 2}; |
| @@ -743,7 +751,7 @@ void FSP_SRV::OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx) { | |||
| 743 | rb.Push(ResultCode(-1)); | 751 | rb.Push(ResultCode(-1)); |
| 744 | } | 752 | } |
| 745 | 753 | ||
| 746 | void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) { | 754 | void FSP_SRV::OpenSdCardFileSystem(Kernel::HLERequestContext& ctx) { |
| 747 | LOG_DEBUG(Service_FS, "called"); | 755 | LOG_DEBUG(Service_FS, "called"); |
| 748 | 756 | ||
| 749 | IFileSystem filesystem(OpenSDMC().Unwrap()); | 757 | IFileSystem filesystem(OpenSDMC().Unwrap()); |
| @@ -753,7 +761,7 @@ void FSP_SRV::MountSdCard(Kernel::HLERequestContext& ctx) { | |||
| 753 | rb.PushIpcInterface<IFileSystem>(std::move(filesystem)); | 761 | rb.PushIpcInterface<IFileSystem>(std::move(filesystem)); |
| 754 | } | 762 | } |
| 755 | 763 | ||
| 756 | void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) { | 764 | void FSP_SRV::CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx) { |
| 757 | IPC::RequestParser rp{ctx}; | 765 | IPC::RequestParser rp{ctx}; |
| 758 | 766 | ||
| 759 | auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>(); | 767 | auto save_struct = rp.PopRaw<FileSys::SaveDataDescriptor>(); |
| @@ -767,7 +775,7 @@ void FSP_SRV::CreateSaveData(Kernel::HLERequestContext& ctx) { | |||
| 767 | rb.Push(RESULT_SUCCESS); | 775 | rb.Push(RESULT_SUCCESS); |
| 768 | } | 776 | } |
| 769 | 777 | ||
| 770 | void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) { | 778 | void FSP_SRV::OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx) { |
| 771 | IPC::RequestParser rp{ctx}; | 779 | IPC::RequestParser rp{ctx}; |
| 772 | 780 | ||
| 773 | auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>(); | 781 | auto space_id = rp.PopRaw<FileSys::SaveDataSpaceId>(); |
| @@ -793,7 +801,7 @@ void FSP_SRV::MountSaveData(Kernel::HLERequestContext& ctx) { | |||
| 793 | 801 | ||
| 794 | void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) { | 802 | void FSP_SRV::OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx) { |
| 795 | LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem"); | 803 | LOG_WARNING(Service_FS, "(STUBBED) called, delegating to 51 OpenSaveDataFilesystem"); |
| 796 | MountSaveData(ctx); | 804 | OpenSaveDataFileSystem(ctx); |
| 797 | } | 805 | } |
| 798 | 806 | ||
| 799 | void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) { | 807 | void FSP_SRV::OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx) { |
| @@ -881,7 +889,7 @@ void FSP_SRV::OpenDataStorageByDataId(Kernel::HLERequestContext& ctx) { | |||
| 881 | rb.PushIpcInterface<IStorage>(std::move(storage)); | 889 | rb.PushIpcInterface<IStorage>(std::move(storage)); |
| 882 | } | 890 | } |
| 883 | 891 | ||
| 884 | void FSP_SRV::OpenRomStorage(Kernel::HLERequestContext& ctx) { | 892 | void FSP_SRV::OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx) { |
| 885 | IPC::RequestParser rp{ctx}; | 893 | IPC::RequestParser rp{ctx}; |
| 886 | 894 | ||
| 887 | auto storage_id = rp.PopRaw<FileSys::StorageId>(); | 895 | auto storage_id = rp.PopRaw<FileSys::StorageId>(); |
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h index e7abec0a3..3a5f4e200 100644 --- a/src/core/hle/service/filesystem/fsp_srv.h +++ b/src/core/hle/service/filesystem/fsp_srv.h | |||
| @@ -19,17 +19,17 @@ public: | |||
| 19 | ~FSP_SRV() override; | 19 | ~FSP_SRV() override; |
| 20 | 20 | ||
| 21 | private: | 21 | private: |
| 22 | void Initialize(Kernel::HLERequestContext& ctx); | 22 | void SetCurrentProcess(Kernel::HLERequestContext& ctx); |
| 23 | void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx); | 23 | void OpenFileSystemWithPatch(Kernel::HLERequestContext& ctx); |
| 24 | void MountSdCard(Kernel::HLERequestContext& ctx); | 24 | void OpenSdCardFileSystem(Kernel::HLERequestContext& ctx); |
| 25 | void CreateSaveData(Kernel::HLERequestContext& ctx); | 25 | void CreateSaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 26 | void MountSaveData(Kernel::HLERequestContext& ctx); | 26 | void OpenSaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 27 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); | 27 | void OpenReadOnlySaveDataFileSystem(Kernel::HLERequestContext& ctx); |
| 28 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); | 28 | void OpenSaveDataInfoReaderBySaveDataSpaceId(Kernel::HLERequestContext& ctx); |
| 29 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); | 29 | void GetGlobalAccessLogMode(Kernel::HLERequestContext& ctx); |
| 30 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); | 30 | void OpenDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 31 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); | 31 | void OpenDataStorageByDataId(Kernel::HLERequestContext& ctx); |
| 32 | void OpenRomStorage(Kernel::HLERequestContext& ctx); | 32 | void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); |
| 33 | 33 | ||
| 34 | FileSys::VirtualFile romfs; | 34 | FileSys::VirtualFile romfs; |
| 35 | }; | 35 | }; |
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp index 0297edca0..5d31f638f 100644 --- a/src/core/hle/service/ncm/ncm.cpp +++ b/src/core/hle/service/ncm/ncm.cpp | |||
| @@ -40,10 +40,10 @@ public: | |||
| 40 | {6, nullptr, "CloseContentStorageForcibly"}, | 40 | {6, nullptr, "CloseContentStorageForcibly"}, |
| 41 | {7, nullptr, "CloseContentMetaDatabaseForcibly"}, | 41 | {7, nullptr, "CloseContentMetaDatabaseForcibly"}, |
| 42 | {8, nullptr, "CleanupContentMetaDatabase"}, | 42 | {8, nullptr, "CleanupContentMetaDatabase"}, |
| 43 | {9, nullptr, "OpenContentStorage2"}, | 43 | {9, nullptr, "ActivateContentStorage"}, |
| 44 | {10, nullptr, "CloseContentStorage"}, | 44 | {10, nullptr, "InactivateContentStorage"}, |
| 45 | {11, nullptr, "OpenContentMetaDatabase2"}, | 45 | {11, nullptr, "ActivateContentMetaDatabase"}, |
| 46 | {12, nullptr, "CloseContentMetaDatabase"}, | 46 | {12, nullptr, "InactivateContentMetaDatabase"}, |
| 47 | }; | 47 | }; |
| 48 | // clang-format on | 48 | // clang-format on |
| 49 | 49 | ||
diff --git a/src/core/hle/service/ns/ns.cpp b/src/core/hle/service/ns/ns.cpp index 2663f56b1..0eb04037a 100644 --- a/src/core/hle/service/ns/ns.cpp +++ b/src/core/hle/service/ns/ns.cpp | |||
| @@ -43,7 +43,7 @@ public: | |||
| 43 | {11, nullptr, "CalculateApplicationOccupiedSize"}, | 43 | {11, nullptr, "CalculateApplicationOccupiedSize"}, |
| 44 | {16, nullptr, "PushApplicationRecord"}, | 44 | {16, nullptr, "PushApplicationRecord"}, |
| 45 | {17, nullptr, "ListApplicationRecordContentMeta"}, | 45 | {17, nullptr, "ListApplicationRecordContentMeta"}, |
| 46 | {19, nullptr, "LaunchApplication"}, | 46 | {19, nullptr, "LaunchApplicationOld"}, |
| 47 | {21, nullptr, "GetApplicationContentPath"}, | 47 | {21, nullptr, "GetApplicationContentPath"}, |
| 48 | {22, nullptr, "TerminateApplication"}, | 48 | {22, nullptr, "TerminateApplication"}, |
| 49 | {23, nullptr, "ResolveApplicationContentPath"}, | 49 | {23, nullptr, "ResolveApplicationContentPath"}, |
| @@ -96,10 +96,10 @@ public: | |||
| 96 | {86, nullptr, "EnableApplicationCrashReport"}, | 96 | {86, nullptr, "EnableApplicationCrashReport"}, |
| 97 | {87, nullptr, "IsApplicationCrashReportEnabled"}, | 97 | {87, nullptr, "IsApplicationCrashReportEnabled"}, |
| 98 | {90, nullptr, "BoostSystemMemoryResourceLimit"}, | 98 | {90, nullptr, "BoostSystemMemoryResourceLimit"}, |
| 99 | {91, nullptr, "Unknown1"}, | 99 | {91, nullptr, "DeprecatedLaunchApplication"}, |
| 100 | {92, nullptr, "Unknown2"}, | 100 | {92, nullptr, "GetRunningApplicationProgramId"}, |
| 101 | {93, nullptr, "GetMainApplicationProgramIndex"}, | 101 | {93, nullptr, "GetMainApplicationProgramIndex"}, |
| 102 | {94, nullptr, "LaunchApplication2"}, | 102 | {94, nullptr, "LaunchApplication"}, |
| 103 | {95, nullptr, "GetApplicationLaunchInfo"}, | 103 | {95, nullptr, "GetApplicationLaunchInfo"}, |
| 104 | {96, nullptr, "AcquireApplicationLaunchInfo"}, | 104 | {96, nullptr, "AcquireApplicationLaunchInfo"}, |
| 105 | {97, nullptr, "GetMainApplicationProgramIndex2"}, | 105 | {97, nullptr, "GetMainApplicationProgramIndex2"}, |
| @@ -163,7 +163,7 @@ public: | |||
| 163 | {907, nullptr, "WithdrawApplicationUpdateRequest"}, | 163 | {907, nullptr, "WithdrawApplicationUpdateRequest"}, |
| 164 | {908, nullptr, "ListApplicationRecordInstalledContentMeta"}, | 164 | {908, nullptr, "ListApplicationRecordInstalledContentMeta"}, |
| 165 | {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"}, | 165 | {909, nullptr, "WithdrawCleanupAddOnContentsWithNoRightsRecommendation"}, |
| 166 | {910, nullptr, "Unknown3"}, | 166 | {910, nullptr, "HasApplicationRecord"}, |
| 167 | {911, nullptr, "SetPreInstalledApplication"}, | 167 | {911, nullptr, "SetPreInstalledApplication"}, |
| 168 | {912, nullptr, "ClearPreInstalledApplicationFlag"}, | 168 | {912, nullptr, "ClearPreInstalledApplicationFlag"}, |
| 169 | {1000, nullptr, "RequestVerifyApplicationDeprecated"}, | 169 | {1000, nullptr, "RequestVerifyApplicationDeprecated"}, |
| @@ -219,10 +219,10 @@ public: | |||
| 219 | {2015, nullptr, "CompareSystemDeliveryInfo"}, | 219 | {2015, nullptr, "CompareSystemDeliveryInfo"}, |
| 220 | {2016, nullptr, "ListNotCommittedContentMeta"}, | 220 | {2016, nullptr, "ListNotCommittedContentMeta"}, |
| 221 | {2017, nullptr, "CreateDownloadTask"}, | 221 | {2017, nullptr, "CreateDownloadTask"}, |
| 222 | {2018, nullptr, "Unknown4"}, | 222 | {2018, nullptr, "GetApplicationDeliveryInfoHash"}, |
| 223 | {2050, nullptr, "Unknown5"}, | 223 | {2050, nullptr, "GetApplicationRightsOnClient"}, |
| 224 | {2100, nullptr, "Unknown6"}, | 224 | {2100, nullptr, "GetApplicationTerminateResult"}, |
| 225 | {2101, nullptr, "Unknown7"}, | 225 | {2101, nullptr, "GetRawApplicationTerminateResult"}, |
| 226 | {2150, nullptr, "CreateRightsEnvironment"}, | 226 | {2150, nullptr, "CreateRightsEnvironment"}, |
| 227 | {2151, nullptr, "DestroyRightsEnvironment"}, | 227 | {2151, nullptr, "DestroyRightsEnvironment"}, |
| 228 | {2152, nullptr, "ActivateRightsEnvironment"}, | 228 | {2152, nullptr, "ActivateRightsEnvironment"}, |
| @@ -237,10 +237,10 @@ public: | |||
| 237 | {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"}, | 237 | {2182, nullptr, "SetActiveRightsContextUsingStateToRightsEnvironment"}, |
| 238 | {2190, nullptr, "GetRightsEnvironmentHandleForApplication"}, | 238 | {2190, nullptr, "GetRightsEnvironmentHandleForApplication"}, |
| 239 | {2199, nullptr, "GetRightsEnvironmentCountForDebug"}, | 239 | {2199, nullptr, "GetRightsEnvironmentCountForDebug"}, |
| 240 | {2200, nullptr, "Unknown8"}, | 240 | {2200, nullptr, "GetGameCardApplicationCopyIdentifier"}, |
| 241 | {2201, nullptr, "Unknown9"}, | 241 | {2201, nullptr, "GetInstalledApplicationCopyIdentifier"}, |
| 242 | {2250, nullptr, "Unknown10"}, | 242 | {2250, nullptr, "RequestReportActiveELicence"}, |
| 243 | {2300, nullptr, "Unknown11"}, | 243 | {2300, nullptr, "ListEventLog"}, |
| 244 | }; | 244 | }; |
| 245 | // clang-format on | 245 | // clang-format on |
| 246 | 246 | ||
| @@ -355,6 +355,7 @@ public: | |||
| 355 | static const FunctionInfo functions[] = { | 355 | static const FunctionInfo functions[] = { |
| 356 | {21, nullptr, "GetApplicationContentPath"}, | 356 | {21, nullptr, "GetApplicationContentPath"}, |
| 357 | {23, nullptr, "ResolveApplicationContentPath"}, | 357 | {23, nullptr, "ResolveApplicationContentPath"}, |
| 358 | {93, nullptr, "GetRunningApplicationProgramId"}, | ||
| 358 | }; | 359 | }; |
| 359 | // clang-format on | 360 | // clang-format on |
| 360 | 361 | ||
| @@ -389,6 +390,11 @@ public: | |||
| 389 | // clang-format off | 390 | // clang-format off |
| 390 | static const FunctionInfo functions[] = { | 391 | static const FunctionInfo functions[] = { |
| 391 | {0, nullptr, "RequestLinkDevice"}, | 392 | {0, nullptr, "RequestLinkDevice"}, |
| 393 | {1, nullptr, "RequestCleanupAllPreInstalledApplications"}, | ||
| 394 | {2, nullptr, "RequestCleanupPreInstalledApplication"}, | ||
| 395 | {3, nullptr, "RequestSyncRights"}, | ||
| 396 | {4, nullptr, "RequestUnlinkDevice"}, | ||
| 397 | {5, nullptr, "RequestRevokeAllELicense"}, | ||
| 392 | }; | 398 | }; |
| 393 | // clang-format on | 399 | // clang-format on |
| 394 | 400 | ||
| @@ -403,7 +409,7 @@ public: | |||
| 403 | static const FunctionInfo functions[] = { | 409 | static const FunctionInfo functions[] = { |
| 404 | {100, nullptr, "ResetToFactorySettings"}, | 410 | {100, nullptr, "ResetToFactorySettings"}, |
| 405 | {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"}, | 411 | {101, nullptr, "ResetToFactorySettingsWithoutUserSaveData"}, |
| 406 | {102, nullptr, "ResetToFactorySettingsForRefurbishment "}, | 412 | {102, nullptr, "ResetToFactorySettingsForRefurbishment"}, |
| 407 | }; | 413 | }; |
| 408 | // clang-format on | 414 | // clang-format on |
| 409 | 415 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 6a613aeab..cde06916d 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | 7 | ||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 10 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 11 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| @@ -22,7 +21,6 @@ | |||
| 22 | #include "core/hle/service/nvflinger/nvflinger.h" | 21 | #include "core/hle/service/nvflinger/nvflinger.h" |
| 23 | #include "core/perf_stats.h" | 22 | #include "core/perf_stats.h" |
| 24 | #include "video_core/renderer_base.h" | 23 | #include "video_core/renderer_base.h" |
| 25 | #include "video_core/video_core.h" | ||
| 26 | 24 | ||
| 27 | namespace Service::NVFlinger { | 25 | namespace Service::NVFlinger { |
| 28 | 26 | ||
| @@ -30,12 +28,6 @@ constexpr std::size_t SCREEN_REFRESH_RATE = 60; | |||
| 30 | constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); | 28 | constexpr u64 frame_ticks = static_cast<u64>(CoreTiming::BASE_CLOCK_RATE / SCREEN_REFRESH_RATE); |
| 31 | 29 | ||
| 32 | NVFlinger::NVFlinger() { | 30 | NVFlinger::NVFlinger() { |
| 33 | // Add the different displays to the list of displays. | ||
| 34 | displays.emplace_back(0, "Default"); | ||
| 35 | displays.emplace_back(1, "External"); | ||
| 36 | displays.emplace_back(2, "Edid"); | ||
| 37 | displays.emplace_back(3, "Internal"); | ||
| 38 | |||
| 39 | // Schedule the screen composition events | 31 | // Schedule the screen composition events |
| 40 | composition_event = | 32 | composition_event = |
| 41 | CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { | 33 | CoreTiming::RegisterEvent("ScreenComposition", [this](u64 userdata, int cycles_late) { |
| @@ -54,66 +46,120 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) { | |||
| 54 | nvdrv = std::move(instance); | 46 | nvdrv = std::move(instance); |
| 55 | } | 47 | } |
| 56 | 48 | ||
| 57 | u64 NVFlinger::OpenDisplay(std::string_view name) { | 49 | std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) { |
| 58 | LOG_WARNING(Service, "Opening display {}", name); | 50 | LOG_DEBUG(Service, "Opening \"{}\" display", name); |
| 59 | 51 | ||
| 60 | // TODO(Subv): Currently we only support the Default display. | 52 | // TODO(Subv): Currently we only support the Default display. |
| 61 | ASSERT(name == "Default"); | 53 | ASSERT(name == "Default"); |
| 62 | 54 | ||
| 63 | auto itr = std::find_if(displays.begin(), displays.end(), | 55 | const auto itr = std::find_if(displays.begin(), displays.end(), |
| 64 | [&](const Display& display) { return display.name == name; }); | 56 | [&](const Display& display) { return display.name == name; }); |
| 65 | 57 | if (itr == displays.end()) { | |
| 66 | ASSERT(itr != displays.end()); | 58 | return {}; |
| 59 | } | ||
| 67 | 60 | ||
| 68 | return itr->id; | 61 | return itr->id; |
| 69 | } | 62 | } |
| 70 | 63 | ||
| 71 | u64 NVFlinger::CreateLayer(u64 display_id) { | 64 | std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { |
| 72 | auto& display = GetDisplay(display_id); | 65 | auto* const display = FindDisplay(display_id); |
| 66 | |||
| 67 | if (display == nullptr) { | ||
| 68 | return {}; | ||
| 69 | } | ||
| 73 | 70 | ||
| 74 | ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); | 71 | ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment"); |
| 75 | 72 | ||
| 76 | u64 layer_id = next_layer_id++; | 73 | const u64 layer_id = next_layer_id++; |
| 77 | u32 buffer_queue_id = next_buffer_queue_id++; | 74 | const u32 buffer_queue_id = next_buffer_queue_id++; |
| 78 | auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); | 75 | auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); |
| 79 | display.layers.emplace_back(layer_id, buffer_queue); | 76 | display->layers.emplace_back(layer_id, buffer_queue); |
| 80 | buffer_queues.emplace_back(std::move(buffer_queue)); | 77 | buffer_queues.emplace_back(std::move(buffer_queue)); |
| 81 | return layer_id; | 78 | return layer_id; |
| 82 | } | 79 | } |
| 83 | 80 | ||
| 84 | u32 NVFlinger::GetBufferQueueId(u64 display_id, u64 layer_id) { | 81 | std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { |
| 85 | const auto& layer = GetLayer(display_id, layer_id); | 82 | const auto* const layer = FindLayer(display_id, layer_id); |
| 86 | return layer.buffer_queue->GetId(); | 83 | |
| 84 | if (layer == nullptr) { | ||
| 85 | return {}; | ||
| 86 | } | ||
| 87 | |||
| 88 | return layer->buffer_queue->GetId(); | ||
| 87 | } | 89 | } |
| 88 | 90 | ||
| 89 | Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) { | 91 | Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const { |
| 90 | return GetDisplay(display_id).vsync_event.readable; | 92 | auto* const display = FindDisplay(display_id); |
| 93 | |||
| 94 | if (display == nullptr) { | ||
| 95 | return nullptr; | ||
| 96 | } | ||
| 97 | |||
| 98 | return display->vsync_event.readable; | ||
| 91 | } | 99 | } |
| 92 | 100 | ||
| 93 | std::shared_ptr<BufferQueue> NVFlinger::GetBufferQueue(u32 id) const { | 101 | std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { |
| 94 | auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), | 102 | const auto itr = std::find_if(buffer_queues.begin(), buffer_queues.end(), |
| 95 | [&](const auto& queue) { return queue->GetId() == id; }); | 103 | [&](const auto& queue) { return queue->GetId() == id; }); |
| 96 | 104 | ||
| 97 | ASSERT(itr != buffer_queues.end()); | 105 | ASSERT(itr != buffer_queues.end()); |
| 98 | return *itr; | 106 | return *itr; |
| 99 | } | 107 | } |
| 100 | 108 | ||
| 101 | Display& NVFlinger::GetDisplay(u64 display_id) { | 109 | Display* NVFlinger::FindDisplay(u64 display_id) { |
| 102 | auto itr = std::find_if(displays.begin(), displays.end(), | 110 | const auto itr = std::find_if(displays.begin(), displays.end(), |
| 103 | [&](const Display& display) { return display.id == display_id; }); | 111 | [&](const Display& display) { return display.id == display_id; }); |
| 104 | 112 | ||
| 105 | ASSERT(itr != displays.end()); | 113 | if (itr == displays.end()) { |
| 106 | return *itr; | 114 | return nullptr; |
| 115 | } | ||
| 116 | |||
| 117 | return &*itr; | ||
| 107 | } | 118 | } |
| 108 | 119 | ||
| 109 | Layer& NVFlinger::GetLayer(u64 display_id, u64 layer_id) { | 120 | const Display* NVFlinger::FindDisplay(u64 display_id) const { |
| 110 | auto& display = GetDisplay(display_id); | 121 | const auto itr = std::find_if(displays.begin(), displays.end(), |
| 122 | [&](const Display& display) { return display.id == display_id; }); | ||
| 111 | 123 | ||
| 112 | auto itr = std::find_if(display.layers.begin(), display.layers.end(), | 124 | if (itr == displays.end()) { |
| 113 | [&](const Layer& layer) { return layer.id == layer_id; }); | 125 | return nullptr; |
| 126 | } | ||
| 114 | 127 | ||
| 115 | ASSERT(itr != display.layers.end()); | 128 | return &*itr; |
| 116 | return *itr; | 129 | } |
| 130 | |||
| 131 | Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) { | ||
| 132 | auto* const display = FindDisplay(display_id); | ||
| 133 | |||
| 134 | if (display == nullptr) { | ||
| 135 | return nullptr; | ||
| 136 | } | ||
| 137 | |||
| 138 | const auto itr = std::find_if(display->layers.begin(), display->layers.end(), | ||
| 139 | [&](const Layer& layer) { return layer.id == layer_id; }); | ||
| 140 | |||
| 141 | if (itr == display->layers.end()) { | ||
| 142 | return nullptr; | ||
| 143 | } | ||
| 144 | |||
| 145 | return &*itr; | ||
| 146 | } | ||
| 147 | |||
| 148 | const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { | ||
| 149 | const auto* const display = FindDisplay(display_id); | ||
| 150 | |||
| 151 | if (display == nullptr) { | ||
| 152 | return nullptr; | ||
| 153 | } | ||
| 154 | |||
| 155 | const auto itr = std::find_if(display->layers.begin(), display->layers.end(), | ||
| 156 | [&](const Layer& layer) { return layer.id == layer_id; }); | ||
| 157 | |||
| 158 | if (itr == display->layers.end()) { | ||
| 159 | return nullptr; | ||
| 160 | } | ||
| 161 | |||
| 162 | return &*itr; | ||
| 117 | } | 163 | } |
| 118 | 164 | ||
| 119 | void NVFlinger::Compose() { | 165 | void NVFlinger::Compose() { |
| @@ -145,7 +191,7 @@ void NVFlinger::Compose() { | |||
| 145 | continue; | 191 | continue; |
| 146 | } | 192 | } |
| 147 | 193 | ||
| 148 | auto& igbp_buffer = buffer->get().igbp_buffer; | 194 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 149 | 195 | ||
| 150 | // Now send the buffer to the GPU for drawing. | 196 | // Now send the buffer to the GPU for drawing. |
| 151 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 197 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 9abba555b..4c55e99f4 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 9 | #include <optional> | ||
| 8 | #include <string> | 10 | #include <string> |
| 9 | #include <string_view> | 11 | #include <string_view> |
| 10 | #include <vector> | 12 | #include <vector> |
| @@ -56,35 +58,55 @@ public: | |||
| 56 | /// Sets the NVDrv module instance to use to send buffers to the GPU. | 58 | /// Sets the NVDrv module instance to use to send buffers to the GPU. |
| 57 | void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); | 59 | void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); |
| 58 | 60 | ||
| 59 | /// Opens the specified display and returns the id. | 61 | /// Opens the specified display and returns the ID. |
| 60 | u64 OpenDisplay(std::string_view name); | 62 | /// |
| 63 | /// If an invalid display name is provided, then an empty optional is returned. | ||
| 64 | std::optional<u64> OpenDisplay(std::string_view name); | ||
| 61 | 65 | ||
| 62 | /// Creates a layer on the specified display and returns the layer id. | 66 | /// Creates a layer on the specified display and returns the layer ID. |
| 63 | u64 CreateLayer(u64 display_id); | 67 | /// |
| 68 | /// If an invalid display ID is specified, then an empty optional is returned. | ||
| 69 | std::optional<u64> CreateLayer(u64 display_id); | ||
| 64 | 70 | ||
| 65 | /// Gets the buffer queue id of the specified layer in the specified display. | 71 | /// Finds the buffer queue ID of the specified layer in the specified display. |
| 66 | u32 GetBufferQueueId(u64 display_id, u64 layer_id); | 72 | /// |
| 73 | /// If an invalid display ID or layer ID is provided, then an empty optional is returned. | ||
| 74 | std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const; | ||
| 67 | 75 | ||
| 68 | /// Gets the vsync event for the specified display. | 76 | /// Gets the vsync event for the specified display. |
| 69 | Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id); | 77 | /// |
| 78 | /// If an invalid display ID is provided, then nullptr is returned. | ||
| 79 | Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const; | ||
| 70 | 80 | ||
| 71 | /// Obtains a buffer queue identified by the id. | 81 | /// Obtains a buffer queue identified by the ID. |
| 72 | std::shared_ptr<BufferQueue> GetBufferQueue(u32 id) const; | 82 | std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; |
| 73 | 83 | ||
| 74 | /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when | 84 | /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when |
| 75 | /// finished. | 85 | /// finished. |
| 76 | void Compose(); | 86 | void Compose(); |
| 77 | 87 | ||
| 78 | private: | 88 | private: |
| 79 | /// Returns the display identified by the specified id. | 89 | /// Finds the display identified by the specified ID. |
| 80 | Display& GetDisplay(u64 display_id); | 90 | Display* FindDisplay(u64 display_id); |
| 81 | 91 | ||
| 82 | /// Returns the layer identified by the specified id in the desired display. | 92 | /// Finds the display identified by the specified ID. |
| 83 | Layer& GetLayer(u64 display_id, u64 layer_id); | 93 | const Display* FindDisplay(u64 display_id) const; |
| 94 | |||
| 95 | /// Finds the layer identified by the specified ID in the desired display. | ||
| 96 | Layer* FindLayer(u64 display_id, u64 layer_id); | ||
| 97 | |||
| 98 | /// Finds the layer identified by the specified ID in the desired display. | ||
| 99 | const Layer* FindLayer(u64 display_id, u64 layer_id) const; | ||
| 84 | 100 | ||
| 85 | std::shared_ptr<Nvidia::Module> nvdrv; | 101 | std::shared_ptr<Nvidia::Module> nvdrv; |
| 86 | 102 | ||
| 87 | std::vector<Display> displays; | 103 | std::array<Display, 5> displays{{ |
| 104 | {0, "Default"}, | ||
| 105 | {1, "External"}, | ||
| 106 | {2, "Edid"}, | ||
| 107 | {3, "Internal"}, | ||
| 108 | {4, "Null"}, | ||
| 109 | }}; | ||
| 88 | std::vector<std::shared_ptr<BufferQueue>> buffer_queues; | 110 | std::vector<std::shared_ptr<BufferQueue>> buffer_queues; |
| 89 | 111 | ||
| 90 | /// Id to use for the next layer that is created, this counter is shared among all displays. | 112 | /// Id to use for the next layer that is created, this counter is shared among all displays. |
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp index 53e7da9c3..6b27dc4a3 100644 --- a/src/core/hle/service/pm/pm.cpp +++ b/src/core/hle/service/pm/pm.cpp | |||
| @@ -13,7 +13,7 @@ public: | |||
| 13 | explicit BootMode() : ServiceFramework{"pm:bm"} { | 13 | explicit BootMode() : ServiceFramework{"pm:bm"} { |
| 14 | static const FunctionInfo functions[] = { | 14 | static const FunctionInfo functions[] = { |
| 15 | {0, &BootMode::GetBootMode, "GetBootMode"}, | 15 | {0, &BootMode::GetBootMode, "GetBootMode"}, |
| 16 | {1, nullptr, "SetMaintenanceBoot"}, | 16 | {1, &BootMode::SetMaintenanceBoot, "SetMaintenanceBoot"}, |
| 17 | }; | 17 | }; |
| 18 | RegisterHandlers(functions); | 18 | RegisterHandlers(functions); |
| 19 | } | 19 | } |
| @@ -24,8 +24,19 @@ private: | |||
| 24 | 24 | ||
| 25 | IPC::ResponseBuilder rb{ctx, 3}; | 25 | IPC::ResponseBuilder rb{ctx, 3}; |
| 26 | rb.Push(RESULT_SUCCESS); | 26 | rb.Push(RESULT_SUCCESS); |
| 27 | rb.Push<u32>(static_cast<u32>(SystemBootMode::Normal)); // Normal boot mode | 27 | rb.PushEnum(boot_mode); |
| 28 | } | 28 | } |
| 29 | |||
| 30 | void SetMaintenanceBoot(Kernel::HLERequestContext& ctx) { | ||
| 31 | LOG_DEBUG(Service_PM, "called"); | ||
| 32 | |||
| 33 | boot_mode = SystemBootMode::Maintenance; | ||
| 34 | |||
| 35 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 36 | rb.Push(RESULT_SUCCESS); | ||
| 37 | } | ||
| 38 | |||
| 39 | SystemBootMode boot_mode = SystemBootMode::Normal; | ||
| 29 | }; | 40 | }; |
| 30 | 41 | ||
| 31 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { | 42 | class DebugMonitor final : public ServiceFramework<DebugMonitor> { |
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h index 370f2ed72..cc8d3f215 100644 --- a/src/core/hle/service/pm/pm.h +++ b/src/core/hle/service/pm/pm.h | |||
| @@ -9,7 +9,12 @@ class ServiceManager; | |||
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | namespace Service::PM { | 11 | namespace Service::PM { |
| 12 | enum class SystemBootMode : u32 { Normal = 0, Maintenance = 1 }; | 12 | |
| 13 | enum class SystemBootMode { | ||
| 14 | Normal, | ||
| 15 | Maintenance, | ||
| 16 | }; | ||
| 17 | |||
| 13 | /// Registers all PM services with the specified service manager. | 18 | /// Registers all PM services with the specified service manager. |
| 14 | void InstallInterfaces(SM::ServiceManager& service_manager); | 19 | void InstallInterfaces(SM::ServiceManager& service_manager); |
| 15 | 20 | ||
diff --git a/src/core/hle/service/psc/psc.cpp b/src/core/hle/service/psc/psc.cpp index 0ba0a4076..53ec6b031 100644 --- a/src/core/hle/service/psc/psc.cpp +++ b/src/core/hle/service/psc/psc.cpp | |||
| @@ -17,13 +17,13 @@ public: | |||
| 17 | explicit PSC_C() : ServiceFramework{"psc:c"} { | 17 | explicit PSC_C() : ServiceFramework{"psc:c"} { |
| 18 | // clang-format off | 18 | // clang-format off |
| 19 | static const FunctionInfo functions[] = { | 19 | static const FunctionInfo functions[] = { |
| 20 | {0, nullptr, "Unknown1"}, | 20 | {0, nullptr, "Initialize"}, |
| 21 | {1, nullptr, "Unknown2"}, | 21 | {1, nullptr, "DispatchRequest"}, |
| 22 | {2, nullptr, "Unknown3"}, | 22 | {2, nullptr, "GetResult"}, |
| 23 | {3, nullptr, "Unknown4"}, | 23 | {3, nullptr, "GetState"}, |
| 24 | {4, nullptr, "Unknown5"}, | 24 | {4, nullptr, "Cancel"}, |
| 25 | {5, nullptr, "Unknown6"}, | 25 | {5, nullptr, "PrintModuleInformation"}, |
| 26 | {6, nullptr, "Unknown7"}, | 26 | {6, nullptr, "GetModuleInformation"}, |
| 27 | }; | 27 | }; |
| 28 | // clang-format on | 28 | // clang-format on |
| 29 | 29 | ||
| @@ -39,7 +39,8 @@ public: | |||
| 39 | {0, nullptr, "Initialize"}, | 39 | {0, nullptr, "Initialize"}, |
| 40 | {1, nullptr, "GetRequest"}, | 40 | {1, nullptr, "GetRequest"}, |
| 41 | {2, nullptr, "Acknowledge"}, | 41 | {2, nullptr, "Acknowledge"}, |
| 42 | {3, nullptr, "Unknown1"}, | 42 | {3, nullptr, "Finalize"}, |
| 43 | {4, nullptr, "AcknowledgeEx"}, | ||
| 43 | }; | 44 | }; |
| 44 | // clang-format on | 45 | // clang-format on |
| 45 | 46 | ||
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 70c933934..a317a2885 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -34,6 +34,7 @@ namespace Service::VI { | |||
| 34 | 34 | ||
| 35 | constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; | 35 | constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; |
| 36 | constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; | 36 | constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; |
| 37 | constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; | ||
| 37 | 38 | ||
| 38 | struct DisplayInfo { | 39 | struct DisplayInfo { |
| 39 | /// The name of this particular display. | 40 | /// The name of this particular display. |
| @@ -524,7 +525,7 @@ private: | |||
| 524 | LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, | 525 | LOG_DEBUG(Service_VI, "called. id=0x{:08X} transaction={:X}, flags=0x{:08X}", id, |
| 525 | static_cast<u32>(transaction), flags); | 526 | static_cast<u32>(transaction), flags); |
| 526 | 527 | ||
| 527 | auto buffer_queue = nv_flinger->GetBufferQueue(id); | 528 | auto buffer_queue = nv_flinger->FindBufferQueue(id); |
| 528 | 529 | ||
| 529 | if (transaction == TransactionId::Connect) { | 530 | if (transaction == TransactionId::Connect) { |
| 530 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; | 531 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; |
| @@ -558,7 +559,7 @@ private: | |||
| 558 | [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, | 559 | [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx, |
| 559 | Kernel::ThreadWakeupReason reason) { | 560 | Kernel::ThreadWakeupReason reason) { |
| 560 | // Repeat TransactParcel DequeueBuffer when a buffer is available | 561 | // Repeat TransactParcel DequeueBuffer when a buffer is available |
| 561 | auto buffer_queue = nv_flinger->GetBufferQueue(id); | 562 | auto buffer_queue = nv_flinger->FindBufferQueue(id); |
| 562 | std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); | 563 | std::optional<u32> slot = buffer_queue->DequeueBuffer(width, height); |
| 563 | ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); | 564 | ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); |
| 564 | 565 | ||
| @@ -628,7 +629,7 @@ private: | |||
| 628 | 629 | ||
| 629 | LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); | 630 | LOG_WARNING(Service_VI, "(STUBBED) called id={}, unknown={:08X}", id, unknown); |
| 630 | 631 | ||
| 631 | const auto buffer_queue = nv_flinger->GetBufferQueue(id); | 632 | const auto buffer_queue = nv_flinger->FindBufferQueue(id); |
| 632 | 633 | ||
| 633 | // TODO(Subv): Find out what this actually is. | 634 | // TODO(Subv): Find out what this actually is. |
| 634 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 635 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| @@ -704,13 +705,14 @@ private: | |||
| 704 | rb.Push(RESULT_SUCCESS); | 705 | rb.Push(RESULT_SUCCESS); |
| 705 | } | 706 | } |
| 706 | 707 | ||
| 708 | // This function currently does nothing but return a success error code in | ||
| 709 | // the vi library itself, so do the same thing, but log out the passed in values. | ||
| 707 | void SetLayerVisibility(Kernel::HLERequestContext& ctx) { | 710 | void SetLayerVisibility(Kernel::HLERequestContext& ctx) { |
| 708 | IPC::RequestParser rp{ctx}; | 711 | IPC::RequestParser rp{ctx}; |
| 709 | const u64 layer_id = rp.Pop<u64>(); | 712 | const u64 layer_id = rp.Pop<u64>(); |
| 710 | const bool visibility = rp.Pop<bool>(); | 713 | const bool visibility = rp.Pop<bool>(); |
| 711 | 714 | ||
| 712 | LOG_WARNING(Service_VI, "(STUBBED) called, layer_id=0x{:08X}, visibility={}", layer_id, | 715 | LOG_DEBUG(Service_VI, "called, layer_id=0x{:08X}, visibility={}", layer_id, visibility); |
| 713 | visibility); | ||
| 714 | 716 | ||
| 715 | IPC::ResponseBuilder rb{ctx, 2}; | 717 | IPC::ResponseBuilder rb{ctx, 2}; |
| 716 | rb.Push(RESULT_SUCCESS); | 718 | rb.Push(RESULT_SUCCESS); |
| @@ -837,11 +839,16 @@ private: | |||
| 837 | "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}", | 839 | "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}", |
| 838 | unknown, display, aruid); | 840 | unknown, display, aruid); |
| 839 | 841 | ||
| 840 | const u64 layer_id = nv_flinger->CreateLayer(display); | 842 | const auto layer_id = nv_flinger->CreateLayer(display); |
| 843 | if (!layer_id) { | ||
| 844 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 845 | rb.Push(ERR_NOT_FOUND); | ||
| 846 | return; | ||
| 847 | } | ||
| 841 | 848 | ||
| 842 | IPC::ResponseBuilder rb{ctx, 4}; | 849 | IPC::ResponseBuilder rb{ctx, 4}; |
| 843 | rb.Push(RESULT_SUCCESS); | 850 | rb.Push(RESULT_SUCCESS); |
| 844 | rb.Push(layer_id); | 851 | rb.Push(*layer_id); |
| 845 | } | 852 | } |
| 846 | 853 | ||
| 847 | void AddToLayerStack(Kernel::HLERequestContext& ctx) { | 854 | void AddToLayerStack(Kernel::HLERequestContext& ctx) { |
| @@ -949,9 +956,16 @@ private: | |||
| 949 | 956 | ||
| 950 | ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet"); | 957 | ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet"); |
| 951 | 958 | ||
| 959 | const auto display_id = nv_flinger->OpenDisplay(name); | ||
| 960 | if (!display_id) { | ||
| 961 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 962 | rb.Push(ERR_NOT_FOUND); | ||
| 963 | return; | ||
| 964 | } | ||
| 965 | |||
| 952 | IPC::ResponseBuilder rb{ctx, 4}; | 966 | IPC::ResponseBuilder rb{ctx, 4}; |
| 953 | rb.Push(RESULT_SUCCESS); | 967 | rb.Push(RESULT_SUCCESS); |
| 954 | rb.Push<u64>(nv_flinger->OpenDisplay(name)); | 968 | rb.Push<u64>(*display_id); |
| 955 | } | 969 | } |
| 956 | 970 | ||
| 957 | void CloseDisplay(Kernel::HLERequestContext& ctx) { | 971 | void CloseDisplay(Kernel::HLERequestContext& ctx) { |
| @@ -1042,10 +1056,21 @@ private: | |||
| 1042 | 1056 | ||
| 1043 | LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid); | 1057 | LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid); |
| 1044 | 1058 | ||
| 1045 | const u64 display_id = nv_flinger->OpenDisplay(display_name); | 1059 | const auto display_id = nv_flinger->OpenDisplay(display_name); |
| 1046 | const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id); | 1060 | if (!display_id) { |
| 1061 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1062 | rb.Push(ERR_NOT_FOUND); | ||
| 1063 | return; | ||
| 1064 | } | ||
| 1047 | 1065 | ||
| 1048 | NativeWindow native_window{buffer_queue_id}; | 1066 | const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id); |
| 1067 | if (!buffer_queue_id) { | ||
| 1068 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1069 | rb.Push(ERR_NOT_FOUND); | ||
| 1070 | return; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | NativeWindow native_window{*buffer_queue_id}; | ||
| 1049 | IPC::ResponseBuilder rb{ctx, 4}; | 1074 | IPC::ResponseBuilder rb{ctx, 4}; |
| 1050 | rb.Push(RESULT_SUCCESS); | 1075 | rb.Push(RESULT_SUCCESS); |
| 1051 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); | 1076 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); |
| @@ -1061,13 +1086,24 @@ private: | |||
| 1061 | 1086 | ||
| 1062 | // TODO(Subv): What's the difference between a Stray and a Managed layer? | 1087 | // TODO(Subv): What's the difference between a Stray and a Managed layer? |
| 1063 | 1088 | ||
| 1064 | const u64 layer_id = nv_flinger->CreateLayer(display_id); | 1089 | const auto layer_id = nv_flinger->CreateLayer(display_id); |
| 1065 | const u32 buffer_queue_id = nv_flinger->GetBufferQueueId(display_id, layer_id); | 1090 | if (!layer_id) { |
| 1091 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1092 | rb.Push(ERR_NOT_FOUND); | ||
| 1093 | return; | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id); | ||
| 1097 | if (!buffer_queue_id) { | ||
| 1098 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1099 | rb.Push(ERR_NOT_FOUND); | ||
| 1100 | return; | ||
| 1101 | } | ||
| 1066 | 1102 | ||
| 1067 | NativeWindow native_window{buffer_queue_id}; | 1103 | NativeWindow native_window{*buffer_queue_id}; |
| 1068 | IPC::ResponseBuilder rb{ctx, 6}; | 1104 | IPC::ResponseBuilder rb{ctx, 6}; |
| 1069 | rb.Push(RESULT_SUCCESS); | 1105 | rb.Push(RESULT_SUCCESS); |
| 1070 | rb.Push(layer_id); | 1106 | rb.Push(*layer_id); |
| 1071 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); | 1107 | rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); |
| 1072 | } | 1108 | } |
| 1073 | 1109 | ||
| @@ -1087,7 +1123,12 @@ private: | |||
| 1087 | 1123 | ||
| 1088 | LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id); | 1124 | LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id); |
| 1089 | 1125 | ||
| 1090 | const auto vsync_event = nv_flinger->GetVsyncEvent(display_id); | 1126 | const auto vsync_event = nv_flinger->FindVsyncEvent(display_id); |
| 1127 | if (!vsync_event) { | ||
| 1128 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1129 | rb.Push(ERR_NOT_FOUND); | ||
| 1130 | return; | ||
| 1131 | } | ||
| 1091 | 1132 | ||
| 1092 | IPC::ResponseBuilder rb{ctx, 2, 1}; | 1133 | IPC::ResponseBuilder rb{ctx, 2, 1}; |
| 1093 | rb.Push(RESULT_SUCCESS); | 1134 | rb.Push(RESULT_SUCCESS); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 327db68a5..6113e17ff 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -59,6 +59,35 @@ add_library(video_core STATIC | |||
| 59 | renderer_opengl/renderer_opengl.h | 59 | renderer_opengl/renderer_opengl.h |
| 60 | renderer_opengl/utils.cpp | 60 | renderer_opengl/utils.cpp |
| 61 | renderer_opengl/utils.h | 61 | renderer_opengl/utils.h |
| 62 | shader/decode/arithmetic.cpp | ||
| 63 | shader/decode/arithmetic_immediate.cpp | ||
| 64 | shader/decode/bfe.cpp | ||
| 65 | shader/decode/bfi.cpp | ||
| 66 | shader/decode/shift.cpp | ||
| 67 | shader/decode/arithmetic_integer.cpp | ||
| 68 | shader/decode/arithmetic_integer_immediate.cpp | ||
| 69 | shader/decode/arithmetic_half.cpp | ||
| 70 | shader/decode/arithmetic_half_immediate.cpp | ||
| 71 | shader/decode/ffma.cpp | ||
| 72 | shader/decode/hfma2.cpp | ||
| 73 | shader/decode/conversion.cpp | ||
| 74 | shader/decode/memory.cpp | ||
| 75 | shader/decode/float_set_predicate.cpp | ||
| 76 | shader/decode/integer_set_predicate.cpp | ||
| 77 | shader/decode/half_set_predicate.cpp | ||
| 78 | shader/decode/predicate_set_register.cpp | ||
| 79 | shader/decode/predicate_set_predicate.cpp | ||
| 80 | shader/decode/register_set_predicate.cpp | ||
| 81 | shader/decode/float_set.cpp | ||
| 82 | shader/decode/integer_set.cpp | ||
| 83 | shader/decode/half_set.cpp | ||
| 84 | shader/decode/video.cpp | ||
| 85 | shader/decode/xmad.cpp | ||
| 86 | shader/decode/other.cpp | ||
| 87 | shader/decode.cpp | ||
| 88 | shader/shader_ir.cpp | ||
| 89 | shader/shader_ir.h | ||
| 90 | shader/track.cpp | ||
| 62 | surface.cpp | 91 | surface.cpp |
| 63 | surface.h | 92 | surface.h |
| 64 | textures/astc.cpp | 93 | textures/astc.cpp |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 16e0697c4..1097e5c49 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -83,7 +83,7 @@ private: | |||
| 83 | u32 subchannel; ///< Current subchannel | 83 | u32 subchannel; ///< Current subchannel |
| 84 | u32 method_count; ///< Current method count | 84 | u32 method_count; ///< Current method count |
| 85 | u32 length_pending; ///< Large NI command length pending | 85 | u32 length_pending; ///< Large NI command length pending |
| 86 | bool non_incrementing; ///< Current command’s NI flag | 86 | bool non_incrementing; ///< Current command's NI flag |
| 87 | }; | 87 | }; |
| 88 | 88 | ||
| 89 | DmaState dma_state{}; | 89 | DmaState dma_state{}; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e53c77f2b..269df9437 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -208,6 +208,8 @@ enum class UniformType : u64 { | |||
| 208 | SignedShort = 3, | 208 | SignedShort = 3, |
| 209 | Single = 4, | 209 | Single = 4, |
| 210 | Double = 5, | 210 | Double = 5, |
| 211 | Quad = 6, | ||
| 212 | UnsignedQuad = 7, | ||
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| 213 | enum class StoreType : u64 { | 215 | enum class StoreType : u64 { |
| @@ -215,9 +217,9 @@ enum class StoreType : u64 { | |||
| 215 | Signed8 = 1, | 217 | Signed8 = 1, |
| 216 | Unsigned16 = 2, | 218 | Unsigned16 = 2, |
| 217 | Signed16 = 3, | 219 | Signed16 = 3, |
| 218 | Bytes32 = 4, | 220 | Bits32 = 4, |
| 219 | Bytes64 = 5, | 221 | Bits64 = 5, |
| 220 | Bytes128 = 6, | 222 | Bits128 = 6, |
| 221 | }; | 223 | }; |
| 222 | 224 | ||
| 223 | enum class IMinMaxExchange : u64 { | 225 | enum class IMinMaxExchange : u64 { |
| @@ -397,6 +399,10 @@ struct IpaMode { | |||
| 397 | bool operator!=(const IpaMode& a) const { | 399 | bool operator!=(const IpaMode& a) const { |
| 398 | return !operator==(a); | 400 | return !operator==(a); |
| 399 | } | 401 | } |
| 402 | bool operator<(const IpaMode& a) const { | ||
| 403 | return std::tie(interpolation_mode, sampling_mode) < | ||
| 404 | std::tie(a.interpolation_mode, a.sampling_mode); | ||
| 405 | } | ||
| 400 | }; | 406 | }; |
| 401 | 407 | ||
| 402 | enum class SystemVariable : u64 { | 408 | enum class SystemVariable : u64 { |
| @@ -644,6 +650,7 @@ union Instruction { | |||
| 644 | BitField<37, 2, HalfPrecision> precision; | 650 | BitField<37, 2, HalfPrecision> precision; |
| 645 | BitField<32, 1, u64> saturate; | 651 | BitField<32, 1, u64> saturate; |
| 646 | 652 | ||
| 653 | BitField<31, 1, u64> negate_b; | ||
| 647 | BitField<30, 1, u64> negate_c; | 654 | BitField<30, 1, u64> negate_c; |
| 648 | BitField<35, 2, HalfType> type_c; | 655 | BitField<35, 2, HalfType> type_c; |
| 649 | } rr; | 656 | } rr; |
| @@ -780,6 +787,12 @@ union Instruction { | |||
| 780 | } st_l; | 787 | } st_l; |
| 781 | 788 | ||
| 782 | union { | 789 | union { |
| 790 | BitField<48, 3, UniformType> type; | ||
| 791 | BitField<46, 2, u64> cache_mode; | ||
| 792 | BitField<20, 24, s64> immediate_offset; | ||
| 793 | } ldg; | ||
| 794 | |||
| 795 | union { | ||
| 783 | BitField<0, 3, u64> pred0; | 796 | BitField<0, 3, u64> pred0; |
| 784 | BitField<3, 3, u64> pred3; | 797 | BitField<3, 3, u64> pred3; |
| 785 | BitField<7, 1, u64> abs_a; | 798 | BitField<7, 1, u64> abs_a; |
| @@ -968,6 +981,10 @@ union Instruction { | |||
| 968 | } | 981 | } |
| 969 | return false; | 982 | return false; |
| 970 | } | 983 | } |
| 984 | |||
| 985 | bool IsComponentEnabled(std::size_t component) const { | ||
| 986 | return ((1ULL << component) & component_mask) != 0; | ||
| 987 | } | ||
| 971 | } txq; | 988 | } txq; |
| 972 | 989 | ||
| 973 | union { | 990 | union { |
| @@ -1235,11 +1252,19 @@ union Instruction { | |||
| 1235 | union { | 1252 | union { |
| 1236 | BitField<20, 14, u64> offset; | 1253 | BitField<20, 14, u64> offset; |
| 1237 | BitField<34, 5, u64> index; | 1254 | BitField<34, 5, u64> index; |
| 1255 | |||
| 1256 | u64 GetOffset() const { | ||
| 1257 | return offset * 4; | ||
| 1258 | } | ||
| 1238 | } cbuf34; | 1259 | } cbuf34; |
| 1239 | 1260 | ||
| 1240 | union { | 1261 | union { |
| 1241 | BitField<20, 16, s64> offset; | 1262 | BitField<20, 16, s64> offset; |
| 1242 | BitField<36, 5, u64> index; | 1263 | BitField<36, 5, u64> index; |
| 1264 | |||
| 1265 | s64 GetOffset() const { | ||
| 1266 | return offset; | ||
| 1267 | } | ||
| 1243 | } cbuf36; | 1268 | } cbuf36; |
| 1244 | 1269 | ||
| 1245 | // Unsure about the size of this one. | 1270 | // Unsure about the size of this one. |
| @@ -1431,6 +1456,7 @@ public: | |||
| 1431 | PredicateSetRegister, | 1456 | PredicateSetRegister, |
| 1432 | RegisterSetPredicate, | 1457 | RegisterSetPredicate, |
| 1433 | Conversion, | 1458 | Conversion, |
| 1459 | Video, | ||
| 1434 | Xmad, | 1460 | Xmad, |
| 1435 | Unknown, | 1461 | Unknown, |
| 1436 | }; | 1462 | }; |
| @@ -1562,8 +1588,8 @@ private: | |||
| 1562 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1588 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1563 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1589 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1564 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1590 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| 1565 | INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), | 1591 | INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), |
| 1566 | INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"), | 1592 | INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), |
| 1567 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | 1593 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), |
| 1568 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | 1594 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), |
| 1569 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | 1595 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 99c34649f..cf2b76ff6 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -106,7 +106,7 @@ struct Header { | |||
| 106 | } ps; | 106 | } ps; |
| 107 | }; | 107 | }; |
| 108 | 108 | ||
| 109 | u64 GetLocalMemorySize() { | 109 | u64 GetLocalMemorySize() const { |
| 110 | return (common1.shader_local_memory_low_size | | 110 | return (common1.shader_local_memory_low_size | |
| 111 | (common2.shader_local_memory_high_size << 24)); | 111 | (common2.shader_local_memory_high_size << 24)); |
| 112 | } | 112 | } |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08cf6268f..d3d32a359 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/core_timing.h" | ||
| 7 | #include "core/memory.h" | ||
| 6 | #include "video_core/engines/fermi_2d.h" | 8 | #include "video_core/engines/fermi_2d.h" |
| 7 | #include "video_core/engines/kepler_memory.h" | 9 | #include "video_core/engines/kepler_memory.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| @@ -124,9 +126,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) { | |||
| 124 | } | 126 | } |
| 125 | } | 127 | } |
| 126 | 128 | ||
| 129 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 130 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 131 | // So the values you see in docs might be multiplied by 4. | ||
| 127 | enum class BufferMethods { | 132 | enum class BufferMethods { |
| 128 | BindObject = 0, | 133 | BindObject = 0x0, |
| 129 | CountBufferMethods = 0x40, | 134 | Nop = 0x2, |
| 135 | SemaphoreAddressHigh = 0x4, | ||
| 136 | SemaphoreAddressLow = 0x5, | ||
| 137 | SemaphoreSequence = 0x6, | ||
| 138 | SemaphoreTrigger = 0x7, | ||
| 139 | NotifyIntr = 0x8, | ||
| 140 | WrcacheFlush = 0x9, | ||
| 141 | Unk28 = 0xA, | ||
| 142 | Unk2c = 0xB, | ||
| 143 | RefCnt = 0x14, | ||
| 144 | SemaphoreAcquire = 0x1A, | ||
| 145 | SemaphoreRelease = 0x1B, | ||
| 146 | Unk70 = 0x1C, | ||
| 147 | Unk74 = 0x1D, | ||
| 148 | Unk78 = 0x1E, | ||
| 149 | Unk7c = 0x1F, | ||
| 150 | Yield = 0x20, | ||
| 151 | NonPullerMethods = 0x40, | ||
| 152 | }; | ||
| 153 | |||
| 154 | enum class GpuSemaphoreOperation { | ||
| 155 | AcquireEqual = 0x1, | ||
| 156 | WriteLong = 0x2, | ||
| 157 | AcquireGequal = 0x4, | ||
| 158 | AcquireMask = 0x8, | ||
| 130 | }; | 159 | }; |
| 131 | 160 | ||
| 132 | void GPU::CallMethod(const MethodCall& method_call) { | 161 | void GPU::CallMethod(const MethodCall& method_call) { |
| @@ -135,20 +164,78 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 135 | 164 | ||
| 136 | ASSERT(method_call.subchannel < bound_engines.size()); | 165 | ASSERT(method_call.subchannel < bound_engines.size()); |
| 137 | 166 | ||
| 138 | if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { | 167 | if (ExecuteMethodOnEngine(method_call)) { |
| 139 | // Bind the current subchannel to the desired engine id. | 168 | CallEngineMethod(method_call); |
| 140 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | 169 | } else { |
| 141 | method_call.argument); | 170 | CallPullerMethod(method_call); |
| 142 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 143 | return; | ||
| 144 | } | 171 | } |
| 172 | } | ||
| 173 | |||
| 174 | bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) { | ||
| 175 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 176 | return method >= BufferMethods::NonPullerMethods; | ||
| 177 | } | ||
| 145 | 178 | ||
| 146 | if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { | 179 | void GPU::CallPullerMethod(const MethodCall& method_call) { |
| 147 | // TODO(Subv): Research and implement these methods. | 180 | regs.reg_array[method_call.method] = method_call.argument; |
| 148 | LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); | 181 | const auto method = static_cast<BufferMethods>(method_call.method); |
| 149 | return; | 182 | |
| 183 | switch (method) { | ||
| 184 | case BufferMethods::BindObject: { | ||
| 185 | ProcessBindMethod(method_call); | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | case BufferMethods::Nop: | ||
| 189 | case BufferMethods::SemaphoreAddressHigh: | ||
| 190 | case BufferMethods::SemaphoreAddressLow: | ||
| 191 | case BufferMethods::SemaphoreSequence: | ||
| 192 | case BufferMethods::RefCnt: | ||
| 193 | break; | ||
| 194 | case BufferMethods::SemaphoreTrigger: { | ||
| 195 | ProcessSemaphoreTriggerMethod(); | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | case BufferMethods::NotifyIntr: { | ||
| 199 | // TODO(Kmather73): Research and implement this method. | ||
| 200 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case BufferMethods::WrcacheFlush: { | ||
| 204 | // TODO(Kmather73): Research and implement this method. | ||
| 205 | LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented"); | ||
| 206 | break; | ||
| 207 | } | ||
| 208 | case BufferMethods::Unk28: { | ||
| 209 | // TODO(Kmather73): Research and implement this method. | ||
| 210 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | case BufferMethods::Unk2c: { | ||
| 214 | // TODO(Kmather73): Research and implement this method. | ||
| 215 | LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented"); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | case BufferMethods::SemaphoreAcquire: { | ||
| 219 | ProcessSemaphoreAcquire(); | ||
| 220 | break; | ||
| 150 | } | 221 | } |
| 222 | case BufferMethods::SemaphoreRelease: { | ||
| 223 | ProcessSemaphoreRelease(); | ||
| 224 | break; | ||
| 225 | } | ||
| 226 | case BufferMethods::Yield: { | ||
| 227 | // TODO(Kmather73): Research and implement this method. | ||
| 228 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | default: | ||
| 232 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", | ||
| 233 | static_cast<u32>(method)); | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | } | ||
| 151 | 237 | ||
| 238 | void GPU::CallEngineMethod(const MethodCall& method_call) { | ||
| 152 | const EngineID engine = bound_engines[method_call.subchannel]; | 239 | const EngineID engine = bound_engines[method_call.subchannel]; |
| 153 | 240 | ||
| 154 | switch (engine) { | 241 | switch (engine) { |
| @@ -172,4 +259,76 @@ void GPU::CallMethod(const MethodCall& method_call) { | |||
| 172 | } | 259 | } |
| 173 | } | 260 | } |
| 174 | 261 | ||
| 262 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||
| 263 | // Bind the current subchannel to the desired engine id. | ||
| 264 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 265 | method_call.argument); | ||
| 266 | bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); | ||
| 267 | } | ||
| 268 | |||
| 269 | void GPU::ProcessSemaphoreTriggerMethod() { | ||
| 270 | const auto semaphoreOperationMask = 0xF; | ||
| 271 | const auto op = | ||
| 272 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 273 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 274 | auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 275 | struct Block { | ||
| 276 | u32 sequence; | ||
| 277 | u32 zeros = 0; | ||
| 278 | u64 timestamp; | ||
| 279 | }; | ||
| 280 | |||
| 281 | Block block{}; | ||
| 282 | block.sequence = regs.semaphore_sequence; | ||
| 283 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 284 | // CoreTiming | ||
| 285 | block.timestamp = CoreTiming::GetTicks(); | ||
| 286 | Memory::WriteBlock(*address, &block, sizeof(block)); | ||
| 287 | } else { | ||
| 288 | const auto address = | ||
| 289 | memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 290 | const u32 word = Memory::Read32(*address); | ||
| 291 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 292 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 293 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 294 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 295 | // Nothing to do in this case | ||
| 296 | } else { | ||
| 297 | regs.acquire_source = true; | ||
| 298 | regs.acquire_value = regs.semaphore_sequence; | ||
| 299 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 300 | regs.acquire_active = true; | ||
| 301 | regs.acquire_mode = false; | ||
| 302 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 303 | regs.acquire_active = true; | ||
| 304 | regs.acquire_mode = true; | ||
| 305 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 306 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 307 | // semaphore_sequence, gives a non-0 result | ||
| 308 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 309 | } else { | ||
| 310 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | void GPU::ProcessSemaphoreRelease() { | ||
| 317 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 318 | Memory::Write32(*address, regs.semaphore_release); | ||
| 319 | } | ||
| 320 | |||
| 321 | void GPU::ProcessSemaphoreAcquire() { | ||
| 322 | const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); | ||
| 323 | const u32 word = Memory::Read32(*address); | ||
| 324 | const auto value = regs.semaphore_acquire; | ||
| 325 | if (word != value) { | ||
| 326 | regs.acquire_active = true; | ||
| 327 | regs.acquire_value = value; | ||
| 328 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 329 | regs.acquire_mode = false; | ||
| 330 | regs.acquire_source = false; | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 175 | } // namespace Tegra | 334 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index af5ccd1e9..fb8975811 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -156,6 +156,46 @@ public: | |||
| 156 | /// Returns a const reference to the GPU DMA pusher. | 156 | /// Returns a const reference to the GPU DMA pusher. |
| 157 | const Tegra::DmaPusher& DmaPusher() const; | 157 | const Tegra::DmaPusher& DmaPusher() const; |
| 158 | 158 | ||
| 159 | struct Regs { | ||
| 160 | static constexpr size_t NUM_REGS = 0x100; | ||
| 161 | |||
| 162 | union { | ||
| 163 | struct { | ||
| 164 | INSERT_PADDING_WORDS(0x4); | ||
| 165 | struct { | ||
| 166 | u32 address_high; | ||
| 167 | u32 address_low; | ||
| 168 | |||
| 169 | GPUVAddr SmaphoreAddress() const { | ||
| 170 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 171 | address_low); | ||
| 172 | } | ||
| 173 | } smaphore_address; | ||
| 174 | |||
| 175 | u32 semaphore_sequence; | ||
| 176 | u32 semaphore_trigger; | ||
| 177 | INSERT_PADDING_WORDS(0xC); | ||
| 178 | |||
| 179 | // The puser and the puller share the reference counter, the pusher only has read | ||
| 180 | // access | ||
| 181 | u32 reference_count; | ||
| 182 | INSERT_PADDING_WORDS(0x5); | ||
| 183 | |||
| 184 | u32 semaphore_acquire; | ||
| 185 | u32 semaphore_release; | ||
| 186 | INSERT_PADDING_WORDS(0xE4); | ||
| 187 | |||
| 188 | // Puller state | ||
| 189 | u32 acquire_mode; | ||
| 190 | u32 acquire_source; | ||
| 191 | u32 acquire_active; | ||
| 192 | u32 acquire_timeout; | ||
| 193 | u32 acquire_value; | ||
| 194 | }; | ||
| 195 | std::array<u32, NUM_REGS> reg_array; | ||
| 196 | }; | ||
| 197 | } regs{}; | ||
| 198 | |||
| 159 | private: | 199 | private: |
| 160 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 200 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 161 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 201 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| @@ -173,6 +213,37 @@ private: | |||
| 173 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 213 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 174 | /// Inline memory engine | 214 | /// Inline memory engine |
| 175 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 215 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 216 | |||
| 217 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 218 | void ProcessSemaphoreTriggerMethod(); | ||
| 219 | void ProcessSemaphoreRelease(); | ||
| 220 | void ProcessSemaphoreAcquire(); | ||
| 221 | |||
| 222 | // Calls a GPU puller method. | ||
| 223 | void CallPullerMethod(const MethodCall& method_call); | ||
| 224 | // Calls a GPU engine method. | ||
| 225 | void CallEngineMethod(const MethodCall& method_call); | ||
| 226 | // Determines where the method should be executed. | ||
| 227 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 176 | }; | 228 | }; |
| 177 | 229 | ||
| 230 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 231 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 232 | "Field " #field_name " has invalid position") | ||
| 233 | |||
| 234 | ASSERT_REG_POSITION(smaphore_address, 0x4); | ||
| 235 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 236 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 237 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 238 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 239 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 240 | |||
| 241 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 242 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 243 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 244 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 245 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 246 | |||
| 247 | #undef ASSERT_REG_POSITION | ||
| 248 | |||
| 178 | } // namespace Tegra | 249 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index ff5310848..4c08bb148 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -49,11 +49,6 @@ public: | |||
| 49 | return false; | 49 | return false; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | /// Attempt to use a faster method to fill a region | ||
| 53 | virtual bool AccelerateFill(const void* config) { | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Attempt to use a faster method to display the framebuffer to screen | 52 | /// Attempt to use a faster method to display the framebuffer to screen |
| 58 | virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 53 | virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 59 | u32 pixel_stride) { | 54 | u32 pixel_stride) { |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7992b82c4..c7f32feaa 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -4,8 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #include <glad/glad.h> | 5 | #include <glad/glad.h> |
| 6 | 6 | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/logging/log.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "core/memory.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_global_cache.h" | 11 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 8 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 9 | #include "video_core/renderer_opengl/utils.h" | 14 | #include "video_core/renderer_opengl/utils.h" |
| 10 | 15 | ||
| 11 | namespace OpenGL { | 16 | namespace OpenGL { |
| @@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{ | |||
| 18 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); | 23 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); |
| 19 | } | 24 | } |
| 20 | 25 | ||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); | ||
| 28 | |||
| 29 | size = size_; | ||
| 30 | if (size > max_size) { | ||
| 31 | size = max_size; | ||
| 32 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, | ||
| 33 | max_size); | ||
| 34 | } | ||
| 35 | |||
| 36 | // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer | ||
| 37 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | ||
| 38 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { | ||
| 50 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | ||
| 51 | if (!region) { | ||
| 52 | // No reserved surface available, create a new one and reserve it | ||
| 53 | region = std::make_shared<CachedGlobalRegion>(addr, size); | ||
| 54 | ReserveGlobalRegion(region); | ||
| 55 | } | ||
| 56 | region->Reload(size); | ||
| 57 | return region; | ||
| 58 | } | ||
| 59 | |||
| 60 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { | ||
| 61 | reserve[region->GetAddr()] = region; | ||
| 62 | } | ||
| 63 | |||
| 21 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | 64 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
| 22 | : RasterizerCache{rasterizer} {} | 65 | : RasterizerCache{rasterizer} {} |
| 23 | 66 | ||
| 67 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 68 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 69 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 70 | |||
| 71 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 72 | const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; | ||
| 73 | const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( | ||
| 74 | cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); | ||
| 75 | ASSERT(cbuf_addr); | ||
| 76 | |||
| 77 | const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); | ||
| 78 | const auto size = Memory::Read32(*cbuf_addr + 8); | ||
| 79 | const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); | ||
| 80 | ASSERT(actual_addr); | ||
| 81 | |||
| 82 | // Look up global region in the cache based on address | ||
| 83 | GlobalRegion region = TryGet(*actual_addr); | ||
| 84 | |||
| 85 | if (!region) { | ||
| 86 | // No global region found - create a new one | ||
| 87 | region = GetUncachedGlobalRegion(*actual_addr, size); | ||
| 88 | Register(region); | ||
| 89 | } | ||
| 90 | |||
| 91 | return region; | ||
| 92 | } | ||
| 93 | |||
| 24 | } // namespace OpenGL | 94 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 406a735bc..37830bb7c 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <unordered_map> | ||
| 9 | |||
| 8 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 9 | 11 | ||
| 12 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/rasterizer_cache.h" | 15 | #include "video_core/rasterizer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | 17 | ||
| @@ -40,6 +44,9 @@ public: | |||
| 40 | return buffer.handle; | 44 | return buffer.handle; |
| 41 | } | 45 | } |
| 42 | 46 | ||
| 47 | /// Reloads the global region from guest memory | ||
| 48 | void Reload(u32 size_); | ||
| 49 | |||
| 43 | // TODO(Rodrigo): When global memory is written (STG), implement flushing | 50 | // TODO(Rodrigo): When global memory is written (STG), implement flushing |
| 44 | void Flush() override { | 51 | void Flush() override { |
| 45 | UNIMPLEMENTED(); | 52 | UNIMPLEMENTED(); |
| @@ -55,6 +62,17 @@ private: | |||
| 55 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | 62 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { |
| 56 | public: | 63 | public: |
| 57 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | 64 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); |
| 65 | |||
| 66 | /// Gets the current specified shader stage program | ||
| 67 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 68 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 69 | |||
| 70 | private: | ||
| 71 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | ||
| 72 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); | ||
| 73 | void ReserveGlobalRegion(const GlobalRegion& region); | ||
| 74 | |||
| 75 | std::unordered_map<VAddr, GlobalRegion> reserve; | ||
| 58 | }; | 76 | }; |
| 59 | 77 | ||
| 60 | } // namespace OpenGL | 78 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6600ad528..9f7c837d6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -297,10 +297,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 297 | MICROPROFILE_SCOPE(OpenGL_Shader); | 297 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 298 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 298 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 299 | 299 | ||
| 300 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | 300 | BaseBindings base_bindings; |
| 301 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. | ||
| 302 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||
| 303 | u32 current_texture_bindpoint = 0; | ||
| 304 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 301 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 305 | 302 | ||
| 306 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 303 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -324,43 +321,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 324 | const GLintptr offset = buffer_cache.UploadHostMemory( | 321 | const GLintptr offset = buffer_cache.UploadHostMemory( |
| 325 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); | 322 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); |
| 326 | 323 | ||
| 327 | // Bind the buffer | 324 | // Bind the emulation info buffer |
| 328 | glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(), | 325 | glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, |
| 329 | offset, static_cast<GLsizeiptr>(sizeof(ubo))); | 326 | static_cast<GLsizeiptr>(sizeof(ubo))); |
| 330 | 327 | ||
| 331 | Shader shader{shader_cache.GetStageProgram(program)}; | 328 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 329 | const auto [program_handle, next_bindings] = | ||
| 330 | shader->GetProgramHandle(primitive_mode, base_bindings); | ||
| 332 | 331 | ||
| 333 | switch (program) { | 332 | switch (program) { |
| 334 | case Maxwell::ShaderProgram::VertexA: | 333 | case Maxwell::ShaderProgram::VertexA: |
| 335 | case Maxwell::ShaderProgram::VertexB: { | 334 | case Maxwell::ShaderProgram::VertexB: |
| 336 | shader_program_manager->UseProgrammableVertexShader( | 335 | shader_program_manager->UseProgrammableVertexShader(program_handle); |
| 337 | shader->GetProgramHandle(primitive_mode)); | ||
| 338 | break; | 336 | break; |
| 339 | } | 337 | case Maxwell::ShaderProgram::Geometry: |
| 340 | case Maxwell::ShaderProgram::Geometry: { | 338 | shader_program_manager->UseProgrammableGeometryShader(program_handle); |
| 341 | shader_program_manager->UseProgrammableGeometryShader( | ||
| 342 | shader->GetProgramHandle(primitive_mode)); | ||
| 343 | break; | 339 | break; |
| 344 | } | 340 | case Maxwell::ShaderProgram::Fragment: |
| 345 | case Maxwell::ShaderProgram::Fragment: { | 341 | shader_program_manager->UseProgrammableFragmentShader(program_handle); |
| 346 | shader_program_manager->UseProgrammableFragmentShader( | ||
| 347 | shader->GetProgramHandle(primitive_mode)); | ||
| 348 | break; | 342 | break; |
| 349 | } | ||
| 350 | default: | 343 | default: |
| 351 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 344 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 352 | shader_config.enable.Value(), shader_config.offset); | 345 | shader_config.enable.Value(), shader_config.offset); |
| 353 | UNREACHABLE(); | 346 | UNREACHABLE(); |
| 354 | } | 347 | } |
| 355 | 348 | ||
| 356 | // Configure the const buffers for this shader stage. | 349 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 357 | current_constbuffer_bindpoint = | 350 | SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); |
| 358 | SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, | 351 | SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); |
| 359 | current_constbuffer_bindpoint); | 352 | SetupTextures(stage_enum, shader, program_handle, base_bindings); |
| 360 | |||
| 361 | // Configure the textures for this shader stage. | ||
| 362 | current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, | ||
| 363 | primitive_mode, current_texture_bindpoint); | ||
| 364 | 353 | ||
| 365 | // Workaround for Intel drivers. | 354 | // Workaround for Intel drivers. |
| 366 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 355 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| @@ -375,6 +364,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 375 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 364 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
| 376 | index++; | 365 | index++; |
| 377 | } | 366 | } |
| 367 | |||
| 368 | base_bindings = next_bindings; | ||
| 378 | } | 369 | } |
| 379 | 370 | ||
| 380 | SyncClipEnabled(clip_distances); | 371 | SyncClipEnabled(clip_distances); |
| @@ -486,9 +477,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | |||
| 486 | cached_pages.add({pages_interval, delta}); | 477 | cached_pages.add({pages_interval, delta}); |
| 487 | } | 478 | } |
| 488 | 479 | ||
| 489 | void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb, | 480 | std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( |
| 490 | bool using_depth_fb, bool preserve_contents, | 481 | OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, |
| 491 | std::optional<std::size_t> single_color_target) { | 482 | std::optional<std::size_t> single_color_target) { |
| 492 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | 483 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); |
| 493 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 484 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 494 | const auto& regs = gpu.regs; | 485 | const auto& regs = gpu.regs; |
| @@ -500,7 +491,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us | |||
| 500 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 491 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 501 | // single color targets). This is done because the guest registers may not change but the | 492 | // single color targets). This is done because the guest registers may not change but the |
| 502 | // host framebuffer may contain different attachments | 493 | // host framebuffer may contain different attachments |
| 503 | return; | 494 | return current_depth_stencil_usage; |
| 504 | } | 495 | } |
| 505 | current_framebuffer_config_state = fb_config_state; | 496 | current_framebuffer_config_state = fb_config_state; |
| 506 | 497 | ||
| @@ -570,12 +561,14 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us | |||
| 570 | depth_surface->MarkAsModified(true, res_cache); | 561 | depth_surface->MarkAsModified(true, res_cache); |
| 571 | 562 | ||
| 572 | fbkey.zeta = depth_surface->Texture().handle; | 563 | fbkey.zeta = depth_surface->Texture().handle; |
| 573 | fbkey.stencil_enable = regs.stencil_enable; | 564 | fbkey.stencil_enable = regs.stencil_enable && |
| 565 | depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; | ||
| 574 | } | 566 | } |
| 575 | 567 | ||
| 576 | SetupCachedFramebuffer(fbkey, current_state); | 568 | SetupCachedFramebuffer(fbkey, current_state); |
| 577 | |||
| 578 | SyncViewport(current_state); | 569 | SyncViewport(current_state); |
| 570 | |||
| 571 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | ||
| 579 | } | 572 | } |
| 580 | 573 | ||
| 581 | void RasterizerOpenGL::Clear() { | 574 | void RasterizerOpenGL::Clear() { |
| @@ -643,8 +636,8 @@ void RasterizerOpenGL::Clear() { | |||
| 643 | return; | 636 | return; |
| 644 | } | 637 | } |
| 645 | 638 | ||
| 646 | ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false, | 639 | const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( |
| 647 | regs.clear_buffers.RT.Value()); | 640 | clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); |
| 648 | if (regs.clear_flags.scissor) { | 641 | if (regs.clear_flags.scissor) { |
| 649 | SyncScissorTest(clear_state); | 642 | SyncScissorTest(clear_state); |
| 650 | } | 643 | } |
| @@ -659,11 +652,11 @@ void RasterizerOpenGL::Clear() { | |||
| 659 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 652 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| 660 | } | 653 | } |
| 661 | 654 | ||
| 662 | if (use_depth && use_stencil) { | 655 | if (clear_depth && clear_stencil) { |
| 663 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); | 656 | glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); |
| 664 | } else if (use_depth) { | 657 | } else if (clear_depth) { |
| 665 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); | 658 | glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); |
| 666 | } else if (use_stencil) { | 659 | } else if (clear_stencil) { |
| 667 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 660 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 668 | } | 661 | } |
| 669 | } | 662 | } |
| @@ -790,11 +783,6 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs | |||
| 790 | return true; | 783 | return true; |
| 791 | } | 784 | } |
| 792 | 785 | ||
| 793 | bool RasterizerOpenGL::AccelerateFill(const void* config) { | ||
| 794 | UNREACHABLE(); | ||
| 795 | return true; | ||
| 796 | } | ||
| 797 | |||
| 798 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | 786 | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, |
| 799 | VAddr framebuffer_addr, u32 pixel_stride) { | 787 | VAddr framebuffer_addr, u32 pixel_stride) { |
| 800 | if (!framebuffer_addr) { | 788 | if (!framebuffer_addr) { |
| @@ -924,13 +912,14 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||
| 924 | } | 912 | } |
| 925 | } | 913 | } |
| 926 | 914 | ||
| 927 | u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, | 915 | void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 928 | GLenum primitive_mode, u32 current_bindpoint) { | 916 | const Shader& shader, GLuint program_handle, |
| 917 | BaseBindings base_bindings) { | ||
| 929 | MICROPROFILE_SCOPE(OpenGL_UBO); | 918 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 930 | const auto& gpu = Core::System::GetInstance().GPU(); | 919 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 931 | const auto& maxwell3d = gpu.Maxwell3D(); | 920 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 932 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; | 921 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; |
| 933 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; | 922 | const auto& entries = shader->GetShaderEntries().const_buffers; |
| 934 | 923 | ||
| 935 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; | 924 | constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; |
| 936 | std::array<GLuint, max_binds> bind_buffers; | 925 | std::array<GLuint, max_binds> bind_buffers; |
| @@ -965,7 +954,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 965 | } | 954 | } |
| 966 | } else { | 955 | } else { |
| 967 | // Buffer is accessed directly, upload just what we use | 956 | // Buffer is accessed directly, upload just what we use |
| 968 | size = used_buffer.GetSize() * sizeof(float); | 957 | size = used_buffer.GetSize(); |
| 969 | } | 958 | } |
| 970 | 959 | ||
| 971 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 960 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| @@ -973,75 +962,73 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 973 | size = Common::AlignUp(size, sizeof(GLvec4)); | 962 | size = Common::AlignUp(size, sizeof(GLvec4)); |
| 974 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | 963 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); |
| 975 | 964 | ||
| 976 | GLintptr const_buffer_offset = buffer_cache.UploadMemory( | 965 | const GLintptr const_buffer_offset = buffer_cache.UploadMemory( |
| 977 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); | 966 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); |
| 978 | 967 | ||
| 979 | // Now configure the bindpoint of the buffer inside the shader | ||
| 980 | glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), | ||
| 981 | shader->GetProgramResourceIndex(used_buffer), | ||
| 982 | current_bindpoint + bindpoint); | ||
| 983 | |||
| 984 | // Prepare values for multibind | 968 | // Prepare values for multibind |
| 985 | bind_buffers[bindpoint] = buffer_cache.GetHandle(); | 969 | bind_buffers[bindpoint] = buffer_cache.GetHandle(); |
| 986 | bind_offsets[bindpoint] = const_buffer_offset; | 970 | bind_offsets[bindpoint] = const_buffer_offset; |
| 987 | bind_sizes[bindpoint] = size; | 971 | bind_sizes[bindpoint] = size; |
| 988 | } | 972 | } |
| 989 | 973 | ||
| 990 | glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()), | 974 | // The first binding is reserved for emulation values |
| 975 | const GLuint ubo_base_binding = base_bindings.cbuf + 1; | ||
| 976 | glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), | ||
| 991 | bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); | 977 | bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); |
| 978 | } | ||
| 979 | |||
| 980 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||
| 981 | const Shader& shader, GLenum primitive_mode, | ||
| 982 | BaseBindings base_bindings) { | ||
| 983 | // TODO(Rodrigo): Use ARB_multi_bind here | ||
| 984 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | ||
| 992 | 985 | ||
| 993 | return current_bindpoint + static_cast<u32>(entries.size()); | 986 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { |
| 987 | const auto& entry = entries[bindpoint]; | ||
| 988 | const u32 current_bindpoint = base_bindings.gmem + bindpoint; | ||
| 989 | const auto& region = global_cache.GetGlobalRegion(entry, stage); | ||
| 990 | |||
| 991 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); | ||
| 992 | } | ||
| 994 | } | 993 | } |
| 995 | 994 | ||
| 996 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | 995 | void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 997 | GLenum primitive_mode, u32 current_unit) { | 996 | GLuint program_handle, BaseBindings base_bindings) { |
| 998 | MICROPROFILE_SCOPE(OpenGL_Texture); | 997 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 999 | const auto& gpu = Core::System::GetInstance().GPU(); | 998 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 1000 | const auto& maxwell3d = gpu.Maxwell3D(); | 999 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 1001 | const auto& entries = shader->GetShaderEntries().texture_samplers; | 1000 | const auto& entries = shader->GetShaderEntries().samplers; |
| 1002 | 1001 | ||
| 1003 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | 1002 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), |
| 1004 | "Exceeded the number of active textures."); | 1003 | "Exceeded the number of active textures."); |
| 1005 | 1004 | ||
| 1006 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 1005 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 1007 | const auto& entry = entries[bindpoint]; | 1006 | const auto& entry = entries[bindpoint]; |
| 1008 | const u32 current_bindpoint = current_unit + bindpoint; | 1007 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; |
| 1009 | 1008 | auto& unit = state.texture_units[current_bindpoint]; | |
| 1010 | // Bind the uniform to the sampler. | ||
| 1011 | |||
| 1012 | glProgramUniform1i(shader->GetProgramHandle(primitive_mode), | ||
| 1013 | shader->GetUniformLocation(entry), current_bindpoint); | ||
| 1014 | 1009 | ||
| 1015 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); | 1010 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); |
| 1016 | |||
| 1017 | if (!texture.enabled) { | 1011 | if (!texture.enabled) { |
| 1018 | state.texture_units[current_bindpoint].texture = 0; | 1012 | unit.texture = 0; |
| 1019 | continue; | 1013 | continue; |
| 1020 | } | 1014 | } |
| 1021 | 1015 | ||
| 1022 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 1016 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
| 1017 | |||
| 1023 | Surface surface = res_cache.GetTextureSurface(texture, entry); | 1018 | Surface surface = res_cache.GetTextureSurface(texture, entry); |
| 1024 | if (surface != nullptr) { | 1019 | if (surface != nullptr) { |
| 1025 | const GLuint handle = | 1020 | unit.texture = |
| 1026 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; | 1021 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; |
| 1027 | const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); | 1022 | unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); |
| 1028 | state.texture_units[current_bindpoint].texture = handle; | 1023 | unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); |
| 1029 | state.texture_units[current_bindpoint].target = target; | 1024 | unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source); |
| 1030 | state.texture_units[current_bindpoint].swizzle.r = | 1025 | unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source); |
| 1031 | MaxwellToGL::SwizzleSource(texture.tic.x_source); | 1026 | unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source); |
| 1032 | state.texture_units[current_bindpoint].swizzle.g = | ||
| 1033 | MaxwellToGL::SwizzleSource(texture.tic.y_source); | ||
| 1034 | state.texture_units[current_bindpoint].swizzle.b = | ||
| 1035 | MaxwellToGL::SwizzleSource(texture.tic.z_source); | ||
| 1036 | state.texture_units[current_bindpoint].swizzle.a = | ||
| 1037 | MaxwellToGL::SwizzleSource(texture.tic.w_source); | ||
| 1038 | } else { | 1027 | } else { |
| 1039 | // Can occur when texture addr is null or its memory is unmapped/invalid | 1028 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| 1040 | state.texture_units[current_bindpoint].texture = 0; | 1029 | unit.texture = 0; |
| 1041 | } | 1030 | } |
| 1042 | } | 1031 | } |
| 1043 | |||
| 1044 | return current_unit + static_cast<u32>(entries.size()); | ||
| 1045 | } | 1032 | } |
| 1046 | 1033 | ||
| 1047 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 1034 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 21c51f874..7f2bf0f8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -56,7 +56,6 @@ public: | |||
| 56 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 56 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 57 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 57 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 58 | const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; | 58 | const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; |
| 59 | bool AccelerateFill(const void* config) override; | ||
| 60 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 59 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 61 | u32 pixel_stride) override; | 60 | u32 pixel_stride) override; |
| 62 | bool AccelerateDrawBatch(bool is_indexed) override; | 61 | bool AccelerateDrawBatch(bool is_indexed) override; |
| @@ -122,30 +121,25 @@ private: | |||
| 122 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | 121 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. |
| 123 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. | 122 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. |
| 124 | * @param single_color_target Specifies if a single color buffer target should be used. | 123 | * @param single_color_target Specifies if a single color buffer target should be used. |
| 124 | * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture | ||
| 125 | * (requires using_depth_fb to be true) | ||
| 125 | */ | 126 | */ |
| 126 | void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true, | 127 | std::pair<bool, bool> ConfigureFramebuffers( |
| 127 | bool using_depth_fb = true, bool preserve_contents = true, | 128 | OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, |
| 128 | std::optional<std::size_t> single_color_target = {}); | 129 | bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); |
| 129 | 130 | ||
| 130 | /** | 131 | /// Configures the current constbuffers to use for the draw command. |
| 131 | * Configures the current constbuffers to use for the draw command. | 132 | void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, |
| 132 | * @param stage The shader stage to configure buffers for. | 133 | GLuint program_handle, BaseBindings base_bindings); |
| 133 | * @param shader The shader object that contains the specified stage. | ||
| 134 | * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | ||
| 135 | * @returns The next available bindpoint for use in the next shader stage. | ||
| 136 | */ | ||
| 137 | u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||
| 138 | GLenum primitive_mode, u32 current_bindpoint); | ||
| 139 | 134 | ||
| 140 | /** | 135 | /// Configures the current global memory entries to use for the draw command. |
| 141 | * Configures the current textures to use for the draw command. | 136 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 142 | * @param stage The shader stage to configure textures for. | 137 | const Shader& shader, GLenum primitive_mode, |
| 143 | * @param shader The shader object that contains the specified stage. | 138 | BaseBindings base_bindings); |
| 144 | * @param current_unit The offset at which to start counting unused texture units. | 139 | |
| 145 | * @returns The next available bindpoint for use in the next shader stage. | 140 | /// Configures the current textures to use for the draw command. |
| 146 | */ | 141 | void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, |
| 147 | u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | 142 | GLuint program_handle, BaseBindings base_bindings); |
| 148 | GLenum primitive_mode, u32 current_unit); | ||
| 149 | 143 | ||
| 150 | /// Syncs the viewport and depth range to match the guest state | 144 | /// Syncs the viewport and depth range to match the guest state |
| 151 | void SyncViewport(OpenGLState& current_state); | 145 | void SyncViewport(OpenGLState& current_state); |
| @@ -221,6 +215,7 @@ private: | |||
| 221 | 215 | ||
| 222 | std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; | 216 | std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; |
| 223 | FramebufferConfigState current_framebuffer_config_state; | 217 | FramebufferConfigState current_framebuffer_config_state; |
| 218 | std::pair<bool, bool> current_depth_stencil_usage{}; | ||
| 224 | 219 | ||
| 225 | std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; | 220 | std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; |
| 226 | 221 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index a05b8b936..50286432d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -128,6 +128,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 128 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | 128 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); |
| 129 | params.unaligned_height = config.tic.Height(); | 129 | params.unaligned_height = config.tic.Height(); |
| 130 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | 130 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); |
| 131 | params.identity = SurfaceClass::Uploaded; | ||
| 131 | 132 | ||
| 132 | switch (params.target) { | 133 | switch (params.target) { |
| 133 | case SurfaceTarget::Texture1D: | 134 | case SurfaceTarget::Texture1D: |
| @@ -167,6 +168,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 167 | } | 168 | } |
| 168 | 169 | ||
| 169 | params.is_layered = SurfaceTargetIsLayered(params.target); | 170 | params.is_layered = SurfaceTargetIsLayered(params.target); |
| 171 | params.is_array = SurfaceTargetIsArray(params.target); | ||
| 170 | params.max_mip_level = config.tic.max_mip_level + 1; | 172 | params.max_mip_level = config.tic.max_mip_level + 1; |
| 171 | params.rt = {}; | 173 | params.rt = {}; |
| 172 | 174 | ||
| @@ -194,6 +196,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 194 | params.height = config.height; | 196 | params.height = config.height; |
| 195 | params.unaligned_height = config.height; | 197 | params.unaligned_height = config.height; |
| 196 | params.target = SurfaceTarget::Texture2D; | 198 | params.target = SurfaceTarget::Texture2D; |
| 199 | params.identity = SurfaceClass::RenderTarget; | ||
| 197 | params.depth = 1; | 200 | params.depth = 1; |
| 198 | params.max_mip_level = 1; | 201 | params.max_mip_level = 1; |
| 199 | params.is_layered = false; | 202 | params.is_layered = false; |
| @@ -229,6 +232,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 229 | params.height = zeta_height; | 232 | params.height = zeta_height; |
| 230 | params.unaligned_height = zeta_height; | 233 | params.unaligned_height = zeta_height; |
| 231 | params.target = SurfaceTarget::Texture2D; | 234 | params.target = SurfaceTarget::Texture2D; |
| 235 | params.identity = SurfaceClass::DepthBuffer; | ||
| 232 | params.depth = 1; | 236 | params.depth = 1; |
| 233 | params.max_mip_level = 1; | 237 | params.max_mip_level = 1; |
| 234 | params.is_layered = false; | 238 | params.is_layered = false; |
| @@ -257,6 +261,7 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | |||
| 257 | params.height = config.height; | 261 | params.height = config.height; |
| 258 | params.unaligned_height = config.height; | 262 | params.unaligned_height = config.height; |
| 259 | params.target = SurfaceTarget::Texture2D; | 263 | params.target = SurfaceTarget::Texture2D; |
| 264 | params.identity = SurfaceClass::Copy; | ||
| 260 | params.depth = 1; | 265 | params.depth = 1; |
| 261 | params.max_mip_level = 1; | 266 | params.max_mip_level = 1; |
| 262 | params.rt = {}; | 267 | params.rt = {}; |
| @@ -574,8 +579,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 574 | 579 | ||
| 575 | ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); | 580 | ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); |
| 576 | 581 | ||
| 577 | LabelGLObject(GL_TEXTURE, texture.handle, params.addr, | 582 | OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); |
| 578 | SurfaceParams::SurfaceTargetName(params.target)); | ||
| 579 | 583 | ||
| 580 | // Clamp size to mapped GPU memory region | 584 | // Clamp size to mapped GPU memory region |
| 581 | // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 | 585 | // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 |
| @@ -730,7 +734,6 @@ void CachedSurface::FlushGLBuffer() { | |||
| 730 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 734 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| 731 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, | 735 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, |
| 732 | params.height); | 736 | params.height); |
| 733 | ASSERT(params.type != SurfaceType::Fill); | ||
| 734 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | 737 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 735 | ASSERT(texture_src_data); | 738 | ASSERT(texture_src_data); |
| 736 | if (params.is_tiled) { | 739 | if (params.is_tiled) { |
| @@ -877,10 +880,13 @@ void CachedSurface::EnsureTextureView() { | |||
| 877 | UNIMPLEMENTED_IF(gl_is_compressed); | 880 | UNIMPLEMENTED_IF(gl_is_compressed); |
| 878 | 881 | ||
| 879 | const GLenum target{TargetLayer()}; | 882 | const GLenum target{TargetLayer()}; |
| 883 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; | ||
| 884 | constexpr GLuint min_layer = 0; | ||
| 885 | constexpr GLuint min_level = 0; | ||
| 880 | 886 | ||
| 881 | texture_view.Create(); | 887 | texture_view.Create(); |
| 882 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0, | 888 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, |
| 883 | params.max_mip_level, 0, 1); | 889 | params.max_mip_level, min_layer, num_layers); |
| 884 | 890 | ||
| 885 | OpenGLState cur_state = OpenGLState::GetCurState(); | 891 | OpenGLState cur_state = OpenGLState::GetCurState(); |
| 886 | const auto& old_tex = cur_state.texture_units[0]; | 892 | const auto& old_tex = cur_state.texture_units[0]; |
| @@ -897,9 +903,6 @@ void CachedSurface::EnsureTextureView() { | |||
| 897 | 903 | ||
| 898 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); | 904 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); |
| 899 | void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { | 905 | void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { |
| 900 | if (params.type == SurfaceType::Fill) | ||
| 901 | return; | ||
| 902 | |||
| 903 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | 906 | MICROPROFILE_SCOPE(OpenGL_TextureUL); |
| 904 | 907 | ||
| 905 | for (u32 i = 0; i < params.max_mip_level; i++) | 908 | for (u32 i = 0; i < params.max_mip_level; i++) |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 37611c4fc..8d7d6722c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -35,6 +35,14 @@ using PixelFormat = VideoCore::Surface::PixelFormat; | |||
| 35 | using ComponentType = VideoCore::Surface::ComponentType; | 35 | using ComponentType = VideoCore::Surface::ComponentType; |
| 36 | 36 | ||
| 37 | struct SurfaceParams { | 37 | struct SurfaceParams { |
| 38 | |||
| 39 | enum class SurfaceClass { | ||
| 40 | Uploaded, | ||
| 41 | RenderTarget, | ||
| 42 | DepthBuffer, | ||
| 43 | Copy, | ||
| 44 | }; | ||
| 45 | |||
| 38 | static std::string SurfaceTargetName(SurfaceTarget target) { | 46 | static std::string SurfaceTargetName(SurfaceTarget target) { |
| 39 | switch (target) { | 47 | switch (target) { |
| 40 | case SurfaceTarget::Texture1D: | 48 | case SurfaceTarget::Texture1D: |
| @@ -210,6 +218,48 @@ struct SurfaceParams { | |||
| 210 | /// Initializes parameters for caching, should be called after everything has been initialized | 218 | /// Initializes parameters for caching, should be called after everything has been initialized |
| 211 | void InitCacheParameters(Tegra::GPUVAddr gpu_addr); | 219 | void InitCacheParameters(Tegra::GPUVAddr gpu_addr); |
| 212 | 220 | ||
| 221 | std::string TargetName() const { | ||
| 222 | switch (target) { | ||
| 223 | case SurfaceTarget::Texture1D: | ||
| 224 | return "1D"; | ||
| 225 | case SurfaceTarget::Texture2D: | ||
| 226 | return "2D"; | ||
| 227 | case SurfaceTarget::Texture3D: | ||
| 228 | return "3D"; | ||
| 229 | case SurfaceTarget::Texture1DArray: | ||
| 230 | return "1DArray"; | ||
| 231 | case SurfaceTarget::Texture2DArray: | ||
| 232 | return "2DArray"; | ||
| 233 | case SurfaceTarget::TextureCubemap: | ||
| 234 | return "Cube"; | ||
| 235 | default: | ||
| 236 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 237 | UNREACHABLE(); | ||
| 238 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | std::string ClassName() const { | ||
| 243 | switch (identity) { | ||
| 244 | case SurfaceClass::Uploaded: | ||
| 245 | return "UP"; | ||
| 246 | case SurfaceClass::RenderTarget: | ||
| 247 | return "RT"; | ||
| 248 | case SurfaceClass::DepthBuffer: | ||
| 249 | return "DB"; | ||
| 250 | case SurfaceClass::Copy: | ||
| 251 | return "CP"; | ||
| 252 | default: | ||
| 253 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity)); | ||
| 254 | UNREACHABLE(); | ||
| 255 | return fmt::format("CUK({})", static_cast<u32>(identity)); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | std::string IdentityString() const { | ||
| 260 | return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); | ||
| 261 | } | ||
| 262 | |||
| 213 | bool is_tiled; | 263 | bool is_tiled; |
| 214 | u32 block_width; | 264 | u32 block_width; |
| 215 | u32 block_height; | 265 | u32 block_height; |
| @@ -223,8 +273,10 @@ struct SurfaceParams { | |||
| 223 | u32 depth; | 273 | u32 depth; |
| 224 | u32 unaligned_height; | 274 | u32 unaligned_height; |
| 225 | SurfaceTarget target; | 275 | SurfaceTarget target; |
| 276 | SurfaceClass identity; | ||
| 226 | u32 max_mip_level; | 277 | u32 max_mip_level; |
| 227 | bool is_layered; | 278 | bool is_layered; |
| 279 | bool is_array; | ||
| 228 | bool srgb_conversion; | 280 | bool srgb_conversion; |
| 229 | // Parameters used for caching | 281 | // Parameters used for caching |
| 230 | VAddr addr; | 282 | VAddr addr; |
| @@ -255,6 +307,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { | |||
| 255 | static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { | 307 | static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { |
| 256 | SurfaceReserveKey res; | 308 | SurfaceReserveKey res; |
| 257 | res.state = params; | 309 | res.state = params; |
| 310 | res.state.identity = {}; // Ignore the origin of the texture | ||
| 258 | res.state.gpu_addr = {}; // Ignore GPU vaddr in caching | 311 | res.state.gpu_addr = {}; // Ignore GPU vaddr in caching |
| 259 | res.state.rt = {}; // Ignore rt config in caching | 312 | res.state.rt = {}; // Ignore rt config in caching |
| 260 | return res; | 313 | return res; |
| @@ -294,7 +347,7 @@ public: | |||
| 294 | } | 347 | } |
| 295 | 348 | ||
| 296 | const OGLTexture& TextureLayer() { | 349 | const OGLTexture& TextureLayer() { |
| 297 | if (params.is_layered) { | 350 | if (params.is_array) { |
| 298 | return Texture(); | 351 | return Texture(); |
| 299 | } | 352 | } |
| 300 | EnsureTextureView(); | 353 | EnsureTextureView(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c785fffa3..90eda7814 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -10,11 +10,15 @@ | |||
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/utils.h" | 15 | #include "video_core/renderer_opengl/utils.h" |
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 15 | 17 | ||
| 16 | namespace OpenGL { | 18 | namespace OpenGL { |
| 17 | 19 | ||
| 20 | using VideoCommon::Shader::ProgramCode; | ||
| 21 | |||
| 18 | /// Gets the address for the specified shader stage program | 22 | /// Gets the address for the specified shader stage program |
| 19 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 23 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 20 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 24 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| @@ -24,42 +28,31 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 24 | } | 28 | } |
| 25 | 29 | ||
| 26 | /// Gets the shader program code from memory for the specified address | 30 | /// Gets the shader program code from memory for the specified address |
| 27 | static GLShader::ProgramCode GetShaderCode(VAddr addr) { | 31 | static ProgramCode GetShaderCode(VAddr addr) { |
| 28 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | 32 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 29 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); | 33 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 30 | return program_code; | 34 | return program_code; |
| 31 | } | 35 | } |
| 32 | 36 | ||
| 33 | /// Helper function to set shader uniform block bindings for a single shader stage | 37 | /// Gets the shader type from a Maxwell program type |
| 34 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | 38 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { |
| 35 | Maxwell::ShaderStage binding, std::size_t expected_size) { | 39 | switch (program_type) { |
| 36 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); | 40 | case Maxwell::ShaderProgram::VertexA: |
| 37 | if (ub_index == GL_INVALID_INDEX) { | 41 | case Maxwell::ShaderProgram::VertexB: |
| 38 | return; | 42 | return GL_VERTEX_SHADER; |
| 43 | case Maxwell::ShaderProgram::Geometry: | ||
| 44 | return GL_GEOMETRY_SHADER; | ||
| 45 | case Maxwell::ShaderProgram::Fragment: | ||
| 46 | return GL_FRAGMENT_SHADER; | ||
| 47 | default: | ||
| 48 | return GL_NONE; | ||
| 39 | } | 49 | } |
| 40 | |||
| 41 | GLint ub_size = 0; | ||
| 42 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 43 | ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, | ||
| 44 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||
| 45 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 46 | } | ||
| 47 | |||
| 48 | /// Sets shader uniform block bindings for an entire shader program | ||
| 49 | static void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 50 | SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, | ||
| 51 | sizeof(GLShader::MaxwellUniformData)); | ||
| 52 | SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, | ||
| 53 | sizeof(GLShader::MaxwellUniformData)); | ||
| 54 | SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, | ||
| 55 | sizeof(GLShader::MaxwellUniformData)); | ||
| 56 | } | 50 | } |
| 57 | 51 | ||
| 58 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | 52 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) |
| 59 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | 53 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { |
| 60 | 54 | ||
| 61 | GLShader::ProgramResult program_result; | 55 | GLShader::ProgramResult program_result; |
| 62 | GLenum gl_type{}; | ||
| 63 | 56 | ||
| 64 | switch (program_type) { | 57 | switch (program_type) { |
| 65 | case Maxwell::ShaderProgram::VertexA: | 58 | case Maxwell::ShaderProgram::VertexA: |
| @@ -70,17 +63,14 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | |||
| 70 | case Maxwell::ShaderProgram::VertexB: | 63 | case Maxwell::ShaderProgram::VertexB: |
| 71 | CalculateProperties(); | 64 | CalculateProperties(); |
| 72 | program_result = GLShader::GenerateVertexShader(setup); | 65 | program_result = GLShader::GenerateVertexShader(setup); |
| 73 | gl_type = GL_VERTEX_SHADER; | ||
| 74 | break; | 66 | break; |
| 75 | case Maxwell::ShaderProgram::Geometry: | 67 | case Maxwell::ShaderProgram::Geometry: |
| 76 | CalculateProperties(); | 68 | CalculateProperties(); |
| 77 | program_result = GLShader::GenerateGeometryShader(setup); | 69 | program_result = GLShader::GenerateGeometryShader(setup); |
| 78 | gl_type = GL_GEOMETRY_SHADER; | ||
| 79 | break; | 70 | break; |
| 80 | case Maxwell::ShaderProgram::Fragment: | 71 | case Maxwell::ShaderProgram::Fragment: |
| 81 | CalculateProperties(); | 72 | CalculateProperties(); |
| 82 | program_result = GLShader::GenerateFragmentShader(setup); | 73 | program_result = GLShader::GenerateFragmentShader(setup); |
| 83 | gl_type = GL_FRAGMENT_SHADER; | ||
| 84 | break; | 74 | break; |
| 85 | default: | 75 | default: |
| 86 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 76 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); |
| @@ -88,59 +78,105 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | |||
| 88 | return; | 78 | return; |
| 89 | } | 79 | } |
| 90 | 80 | ||
| 81 | code = program_result.first; | ||
| 91 | entries = program_result.second; | 82 | entries = program_result.second; |
| 92 | shader_length = entries.shader_length; | 83 | shader_length = entries.shader_length; |
| 84 | } | ||
| 93 | 85 | ||
| 94 | if (program_type != Maxwell::ShaderProgram::Geometry) { | 86 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, |
| 95 | OGLShader shader; | 87 | BaseBindings base_bindings) { |
| 96 | shader.Create(program_result.first.c_str(), gl_type); | 88 | GLuint handle{}; |
| 97 | program.Create(true, shader.handle); | 89 | if (program_type == Maxwell::ShaderProgram::Geometry) { |
| 98 | SetShaderUniformBlockBindings(program.handle); | 90 | handle = GetGeometryShader(primitive_mode, base_bindings); |
| 99 | LabelGLObject(GL_PROGRAM, program.handle, addr); | ||
| 100 | } else { | 91 | } else { |
| 101 | // Store shader's code to lazily build it on draw | 92 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); |
| 102 | geometry_programs.code = program_result.first; | 93 | auto& program = entry->second; |
| 94 | if (is_cache_miss) { | ||
| 95 | std::string source = AllocateBindings(base_bindings); | ||
| 96 | source += code; | ||
| 97 | |||
| 98 | OGLShader shader; | ||
| 99 | shader.Create(source.c_str(), GetShaderType(program_type)); | ||
| 100 | program.Create(true, shader.handle); | ||
| 101 | LabelGLObject(GL_PROGRAM, program.handle, addr); | ||
| 102 | } | ||
| 103 | |||
| 104 | handle = program.handle; | ||
| 103 | } | 105 | } |
| 106 | |||
| 107 | // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for | ||
| 108 | // emulation values | ||
| 109 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1; | ||
| 110 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||
| 111 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||
| 112 | |||
| 113 | return {handle, base_bindings}; | ||
| 104 | } | 114 | } |
| 105 | 115 | ||
| 106 | GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { | 116 | std::string CachedShader::AllocateBindings(BaseBindings base_bindings) { |
| 107 | const auto search{resource_cache.find(buffer.GetHash())}; | 117 | std::string code = "#version 430 core\n"; |
| 108 | if (search == resource_cache.end()) { | 118 | code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |
| 109 | const GLuint index{ | 119 | |
| 110 | glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; | 120 | for (const auto& cbuf : entries.const_buffers) { |
| 111 | resource_cache[buffer.GetHash()] = index; | 121 | code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); |
| 112 | return index; | ||
| 113 | } | 122 | } |
| 114 | 123 | ||
| 115 | return search->second; | 124 | for (const auto& gmem : entries.global_memory_entries) { |
| 116 | } | 125 | code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), |
| 126 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 127 | } | ||
| 117 | 128 | ||
| 118 | GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { | 129 | for (const auto& sampler : entries.samplers) { |
| 119 | const auto search{uniform_cache.find(sampler.GetHash())}; | 130 | code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), |
| 120 | if (search == uniform_cache.end()) { | 131 | base_bindings.sampler++); |
| 121 | const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())}; | ||
| 122 | uniform_cache[sampler.GetHash()] = index; | ||
| 123 | return index; | ||
| 124 | } | 132 | } |
| 125 | 133 | ||
| 126 | return search->second; | 134 | return code; |
| 135 | } | ||
| 136 | |||
| 137 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | ||
| 138 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | ||
| 139 | auto& programs = entry->second; | ||
| 140 | |||
| 141 | switch (primitive_mode) { | ||
| 142 | case GL_POINTS: | ||
| 143 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||
| 144 | case GL_LINES: | ||
| 145 | case GL_LINE_STRIP: | ||
| 146 | return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); | ||
| 147 | case GL_LINES_ADJACENCY: | ||
| 148 | case GL_LINE_STRIP_ADJACENCY: | ||
| 149 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, | ||
| 150 | "ShaderLinesAdjacency"); | ||
| 151 | case GL_TRIANGLES: | ||
| 152 | case GL_TRIANGLE_STRIP: | ||
| 153 | case GL_TRIANGLE_FAN: | ||
| 154 | return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, | ||
| 155 | "ShaderTriangles"); | ||
| 156 | case GL_TRIANGLES_ADJACENCY: | ||
| 157 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 158 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, | ||
| 159 | "triangles_adjacency", 6, "ShaderTrianglesAdjacency"); | ||
| 160 | default: | ||
| 161 | UNREACHABLE_MSG("Unknown primitive mode."); | ||
| 162 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||
| 163 | } | ||
| 127 | } | 164 | } |
| 128 | 165 | ||
| 129 | GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, | 166 | GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, |
| 130 | const std::string& glsl_topology, u32 max_vertices, | 167 | const std::string& glsl_topology, u32 max_vertices, |
| 131 | const std::string& debug_name) { | 168 | const std::string& debug_name) { |
| 132 | if (target_program.handle != 0) { | 169 | if (target_program.handle != 0) { |
| 133 | return target_program.handle; | 170 | return target_program.handle; |
| 134 | } | 171 | } |
| 135 | std::string source = "#version 430 core\n"; | 172 | std::string source = AllocateBindings(base_bindings); |
| 136 | source += "layout (" + glsl_topology + ") in;\n"; | 173 | source += "layout (" + glsl_topology + ") in;\n"; |
| 137 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 174 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 138 | source += geometry_programs.code; | 175 | source += code; |
| 139 | 176 | ||
| 140 | OGLShader shader; | 177 | OGLShader shader; |
| 141 | shader.Create(source.c_str(), GL_GEOMETRY_SHADER); | 178 | shader.Create(source.c_str(), GL_GEOMETRY_SHADER); |
| 142 | target_program.Create(true, shader.handle); | 179 | target_program.Create(true, shader.handle); |
| 143 | SetShaderUniformBlockBindings(target_program.handle); | ||
| 144 | LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); | 180 | LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); |
| 145 | return target_program.handle; | 181 | return target_program.handle; |
| 146 | }; | 182 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 768747968..904d15dd0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -7,11 +7,15 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <tuple> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 10 | 13 | ||
| 11 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 13 | #include "video_core/rasterizer_cache.h" | 16 | #include "video_core/rasterizer_cache.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 19 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 16 | 20 | ||
| 17 | namespace OpenGL { | 21 | namespace OpenGL { |
| @@ -22,6 +26,16 @@ class RasterizerOpenGL; | |||
| 22 | using Shader = std::shared_ptr<CachedShader>; | 26 | using Shader = std::shared_ptr<CachedShader>; |
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 27 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 24 | 28 | ||
| 29 | struct BaseBindings { | ||
| 30 | u32 cbuf{}; | ||
| 31 | u32 gmem{}; | ||
| 32 | u32 sampler{}; | ||
| 33 | |||
| 34 | bool operator<(const BaseBindings& rhs) const { | ||
| 35 | return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 25 | class CachedShader final : public RasterizerCacheObject { | 39 | class CachedShader final : public RasterizerCacheObject { |
| 26 | public: | 40 | public: |
| 27 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); | 41 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); |
| @@ -43,70 +57,45 @@ public: | |||
| 43 | } | 57 | } |
| 44 | 58 | ||
| 45 | /// Gets the GL program handle for the shader | 59 | /// Gets the GL program handle for the shader |
| 46 | GLuint GetProgramHandle(GLenum primitive_mode) { | 60 | std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, |
| 47 | if (program_type != Maxwell::ShaderProgram::Geometry) { | 61 | BaseBindings base_bindings); |
| 48 | return program.handle; | ||
| 49 | } | ||
| 50 | switch (primitive_mode) { | ||
| 51 | case GL_POINTS: | ||
| 52 | return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||
| 53 | case GL_LINES: | ||
| 54 | case GL_LINE_STRIP: | ||
| 55 | return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines"); | ||
| 56 | case GL_LINES_ADJACENCY: | ||
| 57 | case GL_LINE_STRIP_ADJACENCY: | ||
| 58 | return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4, | ||
| 59 | "ShaderLinesAdjacency"); | ||
| 60 | case GL_TRIANGLES: | ||
| 61 | case GL_TRIANGLE_STRIP: | ||
| 62 | case GL_TRIANGLE_FAN: | ||
| 63 | return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3, | ||
| 64 | "ShaderTriangles"); | ||
| 65 | case GL_TRIANGLES_ADJACENCY: | ||
| 66 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 67 | return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency", | ||
| 68 | 6, "ShaderTrianglesAdjacency"); | ||
| 69 | default: | ||
| 70 | UNREACHABLE_MSG("Unknown primitive mode."); | ||
| 71 | return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | 62 | ||
| 75 | /// Gets the GL program resource location for the specified resource, caching as needed | 63 | private: |
| 76 | GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); | 64 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| 65 | // declared by the hardware. Workaround this issue by generating a different shader per input | ||
| 66 | // topology class. | ||
| 67 | struct GeometryPrograms { | ||
| 68 | OGLProgram points; | ||
| 69 | OGLProgram lines; | ||
| 70 | OGLProgram lines_adjacency; | ||
| 71 | OGLProgram triangles; | ||
| 72 | OGLProgram triangles_adjacency; | ||
| 73 | }; | ||
| 77 | 74 | ||
| 78 | /// Gets the GL uniform location for the specified resource, caching as needed | 75 | std::string AllocateBindings(BaseBindings base_bindings); |
| 79 | GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); | 76 | |
| 77 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | ||
| 80 | 78 | ||
| 81 | private: | ||
| 82 | /// Generates a geometry shader or returns one that already exists. | 79 | /// Generates a geometry shader or returns one that already exists. |
| 83 | GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, | 80 | GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, |
| 84 | u32 max_vertices, const std::string& debug_name); | 81 | const std::string& glsl_topology, u32 max_vertices, |
| 82 | const std::string& debug_name); | ||
| 85 | 83 | ||
| 86 | void CalculateProperties(); | 84 | void CalculateProperties(); |
| 87 | 85 | ||
| 88 | VAddr addr; | 86 | VAddr addr{}; |
| 89 | std::size_t shader_length; | 87 | std::size_t shader_length{}; |
| 90 | Maxwell::ShaderProgram program_type; | 88 | Maxwell::ShaderProgram program_type{}; |
| 91 | GLShader::ShaderSetup setup; | 89 | GLShader::ShaderSetup setup; |
| 92 | GLShader::ShaderEntries entries; | 90 | GLShader::ShaderEntries entries; |
| 93 | 91 | ||
| 94 | // Non-geometry program. | 92 | std::string code; |
| 95 | OGLProgram program; | ||
| 96 | 93 | ||
| 97 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 94 | std::map<BaseBindings, OGLProgram> programs; |
| 98 | // declared by the hardware. Workaround this issue by generating a different shader per input | 95 | std::map<BaseBindings, GeometryPrograms> geometry_programs; |
| 99 | // topology class. | ||
| 100 | struct { | ||
| 101 | std::string code; | ||
| 102 | OGLProgram points; | ||
| 103 | OGLProgram lines; | ||
| 104 | OGLProgram lines_adjacency; | ||
| 105 | OGLProgram triangles; | ||
| 106 | OGLProgram triangles_adjacency; | ||
| 107 | } geometry_programs; | ||
| 108 | 96 | ||
| 109 | std::map<u32, GLuint> resource_cache; | 97 | std::map<u32, GLuint> cbuf_resource_cache; |
| 98 | std::map<u32, GLuint> gmem_resource_cache; | ||
| 110 | std::map<u32, GLint> uniform_cache; | 99 | std::map<u32, GLint> uniform_cache; |
| 111 | }; | 100 | }; |
| 112 | 101 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 1bb09e61b..36035d0d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2,247 +2,42 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <map> | 5 | #include <array> |
| 6 | #include <optional> | ||
| 7 | #include <set> | ||
| 8 | #include <string> | 6 | #include <string> |
| 9 | #include <string_view> | 7 | #include <string_view> |
| 10 | #include <unordered_set> | 8 | #include <variant> |
| 11 | 9 | ||
| 12 | #include <fmt/format.h> | 10 | #include <fmt/format.h> |
| 13 | 11 | ||
| 12 | #include "common/alignment.h" | ||
| 14 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 15 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/shader_bytecode.h" | 15 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_header.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 20 | 19 | ||
| 21 | namespace OpenGL::GLShader::Decompiler { | 20 | namespace OpenGL::GLShader { |
| 22 | 21 | ||
| 23 | using Tegra::Shader::Attribute; | 22 | using Tegra::Shader::Attribute; |
| 24 | using Tegra::Shader::Instruction; | 23 | using Tegra::Shader::Header; |
| 25 | using Tegra::Shader::LogicOperation; | 24 | using Tegra::Shader::IpaInterpMode; |
| 26 | using Tegra::Shader::OpCode; | 25 | using Tegra::Shader::IpaMode; |
| 26 | using Tegra::Shader::IpaSampleMode; | ||
| 27 | using Tegra::Shader::Register; | 27 | using Tegra::Shader::Register; |
| 28 | using Tegra::Shader::Sampler; | 28 | using namespace VideoCommon::Shader; |
| 29 | using Tegra::Shader::SubOp; | ||
| 30 | 29 | ||
| 31 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | 30 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 32 | constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); | 31 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; |
| 32 | using Operation = const OperationNode&; | ||
| 33 | 33 | ||
| 34 | constexpr u32 MAX_GEOMETRY_BUFFERS = 6; | 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 35 | constexpr u32 MAX_ATTRIBUTES = 0x100; // Size in vec4s, this value is untested | 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | ||
| 37 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | ||
| 38 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | ||
| 36 | 39 | ||
| 37 | static const char* INTERNAL_FLAG_NAMES[] = {"zero_flag", "sign_flag", "carry_flag", | 40 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 38 | "overflow_flag"}; | ||
| 39 | |||
| 40 | enum class InternalFlag : u64 { | ||
| 41 | ZeroFlag = 0, | ||
| 42 | SignFlag = 1, | ||
| 43 | CarryFlag = 2, | ||
| 44 | OverflowFlag = 3, | ||
| 45 | Amount | ||
| 46 | }; | ||
| 47 | |||
| 48 | class DecompileFail : public std::runtime_error { | ||
| 49 | public: | ||
| 50 | using std::runtime_error::runtime_error; | ||
| 51 | }; | ||
| 52 | |||
| 53 | /// Generates code to use for a swizzle operation. | ||
| 54 | static std::string GetSwizzle(u64 elem) { | ||
| 55 | ASSERT(elem <= 3); | ||
| 56 | std::string swizzle = "."; | ||
| 57 | swizzle += "xyzw"[elem]; | ||
| 58 | return swizzle; | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Translate topology | ||
| 62 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 63 | switch (topology) { | ||
| 64 | case Tegra::Shader::OutputTopology::PointList: | ||
| 65 | return "points"; | ||
| 66 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 67 | return "line_strip"; | ||
| 68 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 69 | return "triangle_strip"; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 72 | return "points"; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 77 | enum class ExitMethod { | ||
| 78 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 79 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 80 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 81 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 82 | }; | ||
| 83 | |||
| 84 | /// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction. | ||
| 85 | struct Subroutine { | ||
| 86 | /// Generates a name suitable for GLSL source code. | ||
| 87 | std::string GetName() const { | ||
| 88 | return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix; | ||
| 89 | } | ||
| 90 | |||
| 91 | u32 begin; ///< Entry point of the subroutine. | ||
| 92 | u32 end; ///< Return point of the subroutine. | ||
| 93 | const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name | ||
| 94 | ExitMethod exit_method; ///< Exit method of the subroutine. | ||
| 95 | std::set<u32> labels; ///< Addresses refereced by JMP instructions. | ||
| 96 | |||
| 97 | bool operator<(const Subroutine& rhs) const { | ||
| 98 | return std::tie(begin, end) < std::tie(rhs.begin, rhs.end); | ||
| 99 | } | ||
| 100 | }; | ||
| 101 | |||
| 102 | /// Analyzes shader code and produces a set of subroutines. | ||
| 103 | class ControlFlowAnalyzer { | ||
| 104 | public: | ||
| 105 | ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix) | ||
| 106 | : program_code(program_code), shader_coverage_begin(main_offset), | ||
| 107 | shader_coverage_end(main_offset + 1) { | ||
| 108 | |||
| 109 | // Recursively finds all subroutines. | ||
| 110 | const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix); | ||
| 111 | if (program_main.exit_method != ExitMethod::AlwaysEnd) | ||
| 112 | throw DecompileFail("Program does not always end"); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::set<Subroutine> GetSubroutines() { | ||
| 116 | return std::move(subroutines); | ||
| 117 | } | ||
| 118 | |||
| 119 | std::size_t GetShaderLength() const { | ||
| 120 | return shader_coverage_end * sizeof(u64); | ||
| 121 | } | ||
| 122 | |||
| 123 | private: | ||
| 124 | const ProgramCode& program_code; | ||
| 125 | std::set<Subroutine> subroutines; | ||
| 126 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 127 | u32 shader_coverage_begin; | ||
| 128 | u32 shader_coverage_end; | ||
| 129 | |||
| 130 | /// Adds and analyzes a new subroutine if it is not added yet. | ||
| 131 | const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) { | ||
| 132 | Subroutine subroutine{begin, end, suffix, ExitMethod::Undetermined, {}}; | ||
| 133 | |||
| 134 | const auto iter = subroutines.find(subroutine); | ||
| 135 | if (iter != subroutines.end()) { | ||
| 136 | return *iter; | ||
| 137 | } | ||
| 138 | |||
| 139 | subroutine.exit_method = Scan(begin, end, subroutine.labels); | ||
| 140 | if (subroutine.exit_method == ExitMethod::Undetermined) { | ||
| 141 | throw DecompileFail("Recursive function detected"); | ||
| 142 | } | ||
| 143 | |||
| 144 | return *subroutines.insert(std::move(subroutine)).first; | ||
| 145 | } | ||
| 146 | |||
| 147 | /// Merges exit method of two parallel branches. | ||
| 148 | static ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 149 | if (a == ExitMethod::Undetermined) { | ||
| 150 | return b; | ||
| 151 | } | ||
| 152 | if (b == ExitMethod::Undetermined) { | ||
| 153 | return a; | ||
| 154 | } | ||
| 155 | if (a == b) { | ||
| 156 | return a; | ||
| 157 | } | ||
| 158 | return ExitMethod::Conditional; | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Scans a range of code for labels and determines the exit method. | ||
| 162 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 163 | const auto [iter, inserted] = | ||
| 164 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 165 | ExitMethod& exit_method = iter->second; | ||
| 166 | if (!inserted) | ||
| 167 | return exit_method; | ||
| 168 | |||
| 169 | for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) { | ||
| 170 | shader_coverage_begin = std::min(shader_coverage_begin, offset); | ||
| 171 | shader_coverage_end = std::max(shader_coverage_end, offset + 1); | ||
| 172 | |||
| 173 | const Instruction instr = {program_code[offset]}; | ||
| 174 | if (const auto opcode = OpCode::Decode(instr)) { | ||
| 175 | switch (opcode->get().GetId()) { | ||
| 176 | case OpCode::Id::EXIT: { | ||
| 177 | // The EXIT instruction can be predicated, which means that the shader can | ||
| 178 | // conditionally end on this instruction. We have to consider the case where the | ||
| 179 | // condition is not met and check the exit method of that other basic block. | ||
| 180 | using Tegra::Shader::Pred; | ||
| 181 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 182 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 183 | } else { | ||
| 184 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 185 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | case OpCode::Id::BRA: { | ||
| 189 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 190 | labels.insert(target); | ||
| 191 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 192 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 193 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 194 | } | ||
| 195 | case OpCode::Id::SSY: | ||
| 196 | case OpCode::Id::PBK: { | ||
| 197 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 198 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 199 | "Constant buffer branching is not supported"); | ||
| 200 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 201 | labels.insert(target); | ||
| 202 | // Continue scanning for an exit method. | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } | ||
| 208 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 209 | } | ||
| 210 | }; | ||
| 211 | |||
| 212 | template <typename T> | ||
| 213 | class ShaderScopedScope { | ||
| 214 | public: | ||
| 215 | explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr) | ||
| 216 | : writer(writer), end_expr(std::move(end_expr)) { | ||
| 217 | |||
| 218 | if (begin_expr.empty()) { | ||
| 219 | writer.AddLine('{'); | ||
| 220 | } else { | ||
| 221 | writer.AddExpression(begin_expr); | ||
| 222 | writer.AddLine(" {"); | ||
| 223 | } | ||
| 224 | ++writer.scope; | ||
| 225 | } | ||
| 226 | |||
| 227 | ShaderScopedScope(const ShaderScopedScope&) = delete; | ||
| 228 | |||
| 229 | ~ShaderScopedScope() { | ||
| 230 | --writer.scope; | ||
| 231 | if (end_expr.empty()) { | ||
| 232 | writer.AddLine('}'); | ||
| 233 | } else { | ||
| 234 | writer.AddExpression("} "); | ||
| 235 | writer.AddExpression(end_expr); | ||
| 236 | writer.AddLine(';'); | ||
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 240 | ShaderScopedScope& operator=(const ShaderScopedScope&) = delete; | ||
| 241 | |||
| 242 | private: | ||
| 243 | T& writer; | ||
| 244 | std::string end_expr; | ||
| 245 | }; | ||
| 246 | 41 | ||
| 247 | class ShaderWriter { | 42 | class ShaderWriter { |
| 248 | public: | 43 | public: |
| @@ -271,16 +66,17 @@ public: | |||
| 271 | shader_source += '\n'; | 66 | shader_source += '\n'; |
| 272 | } | 67 | } |
| 273 | 68 | ||
| 274 | std::string GetResult() { | 69 | std::string GenerateTemporal() { |
| 275 | return std::move(shader_source); | 70 | std::string temporal = "tmp"; |
| 71 | temporal += std::to_string(temporal_index++); | ||
| 72 | return temporal; | ||
| 276 | } | 73 | } |
| 277 | 74 | ||
| 278 | ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {}, | 75 | std::string GetResult() { |
| 279 | std::string end_expr = {}) { | 76 | return std::move(shader_source); |
| 280 | return ShaderScopedScope(*this, begin_expr, end_expr); | ||
| 281 | } | 77 | } |
| 282 | 78 | ||
| 283 | int scope = 0; | 79 | s32 scope = 0; |
| 284 | 80 | ||
| 285 | private: | 81 | private: |
| 286 | void AppendIndentation() { | 82 | void AppendIndentation() { |
| @@ -288,3663 +84,1483 @@ private: | |||
| 288 | } | 84 | } |
| 289 | 85 | ||
| 290 | std::string shader_source; | 86 | std::string shader_source; |
| 87 | u32 temporal_index = 1; | ||
| 291 | }; | 88 | }; |
| 292 | 89 | ||
| 293 | /** | 90 | /// Generates code to use for a swizzle operation. |
| 294 | * Represents an emulated shader register, used to track the state of that register for emulation | 91 | static std::string GetSwizzle(u32 elem) { |
| 295 | * with GLSL. At this time, a register can be used as a float or an integer. This class is used for | 92 | ASSERT(elem <= 3); |
| 296 | * bookkeeping within the GLSL program. | 93 | std::string swizzle = "."; |
| 297 | */ | 94 | swizzle += "xyzw"[elem]; |
| 298 | class GLSLRegister { | 95 | return swizzle; |
| 299 | public: | 96 | } |
| 300 | enum class Type { | ||
| 301 | Float, | ||
| 302 | Integer, | ||
| 303 | UnsignedInteger, | ||
| 304 | }; | ||
| 305 | |||
| 306 | GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | ||
| 307 | |||
| 308 | /// Gets the GLSL type string for a register | ||
| 309 | static std::string GetTypeString() { | ||
| 310 | return "float"; | ||
| 311 | } | ||
| 312 | |||
| 313 | /// Gets the GLSL register prefix string, used for declarations and referencing | ||
| 314 | static std::string GetPrefixString() { | ||
| 315 | return "reg_"; | ||
| 316 | } | ||
| 317 | |||
| 318 | /// Returns a GLSL string representing the current state of the register | ||
| 319 | std::string GetString() const { | ||
| 320 | return GetPrefixString() + std::to_string(index) + '_' + suffix; | ||
| 321 | } | ||
| 322 | |||
| 323 | /// Returns the index of the register | ||
| 324 | std::size_t GetIndex() const { | ||
| 325 | return index; | ||
| 326 | } | ||
| 327 | |||
| 328 | private: | ||
| 329 | const std::size_t index; | ||
| 330 | const std::string& suffix; | ||
| 331 | }; | ||
| 332 | |||
| 333 | /** | ||
| 334 | * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state | ||
| 335 | * of all registers (e.g. whether they are currently being used as Floats or Integers), and | ||
| 336 | * generates the necessary GLSL code to perform conversions as needed. This class is used for | ||
| 337 | * bookkeeping within the GLSL program. | ||
| 338 | */ | ||
| 339 | class GLSLRegisterManager { | ||
| 340 | public: | ||
| 341 | GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, | ||
| 342 | const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, | ||
| 343 | const Tegra::Shader::Header& header) | ||
| 344 | : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, | ||
| 345 | fixed_pipeline_output_attributes_used{}, local_memory_size{0} { | ||
| 346 | BuildRegisterList(); | ||
| 347 | BuildInputList(); | ||
| 348 | } | ||
| 349 | |||
| 350 | void SetConditionalCodesFromExpression(const std::string& expresion) { | ||
| 351 | SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0"); | ||
| 352 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete."); | ||
| 353 | } | ||
| 354 | |||
| 355 | void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) { | ||
| 356 | SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem))); | ||
| 357 | } | ||
| 358 | |||
| 359 | /** | ||
| 360 | * Returns code that does an integer size conversion for the specified size. | ||
| 361 | * @param value Value to perform integer size conversion on. | ||
| 362 | * @param size Register size to use for conversion instructions. | ||
| 363 | * @returns GLSL string corresponding to the value converted to the specified size. | ||
| 364 | */ | ||
| 365 | static std::string ConvertIntegerSize(const std::string& value, Register::Size size) { | ||
| 366 | switch (size) { | ||
| 367 | case Register::Size::Byte: | ||
| 368 | return "((" + value + " << 24) >> 24)"; | ||
| 369 | case Register::Size::Short: | ||
| 370 | return "((" + value + " << 16) >> 16)"; | ||
| 371 | case Register::Size::Word: | ||
| 372 | // Default - do nothing | ||
| 373 | return value; | ||
| 374 | default: | ||
| 375 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 376 | return value; | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | /** | ||
| 381 | * Gets a register as an float. | ||
| 382 | * @param reg The register to get. | ||
| 383 | * @param elem The element to use for the operation. | ||
| 384 | * @returns GLSL string corresponding to the register as a float. | ||
| 385 | */ | ||
| 386 | std::string GetRegisterAsFloat(const Register& reg, unsigned elem = 0) { | ||
| 387 | return GetRegister(reg, elem); | ||
| 388 | } | ||
| 389 | |||
| 390 | /** | ||
| 391 | * Gets a register as an integer. | ||
| 392 | * @param reg The register to get. | ||
| 393 | * @param elem The element to use for the operation. | ||
| 394 | * @param is_signed Whether to get the register as a signed (or unsigned) integer. | ||
| 395 | * @param size Register size to use for conversion instructions. | ||
| 396 | * @returns GLSL string corresponding to the register as an integer. | ||
| 397 | */ | ||
| 398 | std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true, | ||
| 399 | Register::Size size = Register::Size::Word) { | ||
| 400 | const std::string func{is_signed ? "floatBitsToInt" : "floatBitsToUint"}; | ||
| 401 | const std::string value{func + '(' + GetRegister(reg, elem) + ')'}; | ||
| 402 | return ConvertIntegerSize(value, size); | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * Writes code that does a register assignment to float value operation. | ||
| 407 | * @param reg The destination register to use. | ||
| 408 | * @param elem The element to use for the operation. | ||
| 409 | * @param value The code representing the value to assign. | ||
| 410 | * @param dest_num_components Number of components in the destination. | ||
| 411 | * @param value_num_components Number of components in the value. | ||
| 412 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 413 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | ||
| 414 | * condition flags. | ||
| 415 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 416 | */ | ||
| 417 | void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 418 | u64 dest_num_components, u64 value_num_components, | ||
| 419 | bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0, | ||
| 420 | bool precise = false) { | ||
| 421 | const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value; | ||
| 422 | SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem, | ||
| 423 | precise); | ||
| 424 | if (sets_cc) { | ||
| 425 | if (reg == Register::ZeroIndex) { | ||
| 426 | SetConditionalCodesFromExpression(clamped_value); | ||
| 427 | } else { | ||
| 428 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 429 | } | ||
| 430 | } | ||
| 431 | } | ||
| 432 | |||
| 433 | /** | ||
| 434 | * Writes code that does a register assignment to integer value operation. | ||
| 435 | * @param reg The destination register to use. | ||
| 436 | * @param elem The element to use for the operation. | ||
| 437 | * @param value The code representing the value to assign. | ||
| 438 | * @param dest_num_components Number of components in the destination. | ||
| 439 | * @param value_num_components Number of components in the value. | ||
| 440 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 441 | * @param sets_cc Optional, when True, sets the corresponding values to the implemented | ||
| 442 | * condition flags. | ||
| 443 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 444 | * @param size Register size to use for conversion instructions. | ||
| 445 | */ | ||
| 446 | void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | ||
| 447 | const std::string& value, u64 dest_num_components, | ||
| 448 | u64 value_num_components, bool is_saturated = false, | ||
| 449 | bool sets_cc = false, u64 dest_elem = 0, | ||
| 450 | Register::Size size = Register::Size::Word) { | ||
| 451 | UNIMPLEMENTED_IF(is_saturated); | ||
| 452 | const std::string final_value = ConvertIntegerSize(value, size); | ||
| 453 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 454 | |||
| 455 | SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components, | ||
| 456 | value_num_components, dest_elem, false); | ||
| 457 | |||
| 458 | if (sets_cc) { | ||
| 459 | if (reg == Register::ZeroIndex) { | ||
| 460 | SetConditionalCodesFromExpression(final_value); | ||
| 461 | } else { | ||
| 462 | SetConditionalCodesFromRegister(reg, dest_elem); | ||
| 463 | } | ||
| 464 | } | ||
| 465 | } | ||
| 466 | |||
| 467 | /** | ||
| 468 | * Writes code that does a register assignment to a half float value operation. | ||
| 469 | * @param reg The destination register to use. | ||
| 470 | * @param elem The element to use for the operation. | ||
| 471 | * @param value The code representing the value to assign. Type has to be half float. | ||
| 472 | * @param merge Half float kind of assignment. | ||
| 473 | * @param dest_num_components Number of components in the destination. | ||
| 474 | * @param value_num_components Number of components in the value. | ||
| 475 | * @param is_saturated Optional, when True, saturates the provided value. | ||
| 476 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 477 | */ | ||
| 478 | void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value, | ||
| 479 | Tegra::Shader::HalfMerge merge, u64 dest_num_components, | ||
| 480 | u64 value_num_components, bool is_saturated = false, | ||
| 481 | u64 dest_elem = 0) { | ||
| 482 | UNIMPLEMENTED_IF(is_saturated); | ||
| 483 | |||
| 484 | const std::string result = [&]() { | ||
| 485 | switch (merge) { | ||
| 486 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 487 | return "uintBitsToFloat(packHalf2x16(" + value + "))"; | ||
| 488 | case Tegra::Shader::HalfMerge::F32: | ||
| 489 | // Half float instructions take the first component when doing a float cast. | ||
| 490 | return "float(" + value + ".x)"; | ||
| 491 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 492 | // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the | ||
| 493 | // pack. I couldn't test this on hardware but it shouldn't really matter since most | ||
| 494 | // of the time when a Mrg_* flag is used both components will be mirrored. That | ||
| 495 | // being said, it deserves a test. | ||
| 496 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 497 | " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; | ||
| 498 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 499 | return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) + | ||
| 500 | " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; | ||
| 501 | default: | ||
| 502 | UNREACHABLE(); | ||
| 503 | return std::string("0"); | ||
| 504 | } | ||
| 505 | }(); | ||
| 506 | |||
| 507 | SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false); | ||
| 508 | } | ||
| 509 | |||
| 510 | /** | ||
| 511 | * Writes code that does a register assignment to input attribute operation. Input attributes | ||
| 512 | * are stored as floats, so this may require conversion. | ||
| 513 | * @param reg The destination register to use. | ||
| 514 | * @param elem The element to use for the operation. | ||
| 515 | * @param attribute The input attribute to use as the source value. | ||
| 516 | * @param input_mode The input mode. | ||
| 517 | * @param vertex The register that decides which vertex to read from (used in GS). | ||
| 518 | */ | ||
| 519 | void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute, | ||
| 520 | const Tegra::Shader::IpaMode& input_mode, | ||
| 521 | std::optional<Register> vertex = {}) { | ||
| 522 | const std::string dest = GetRegisterAsFloat(reg); | ||
| 523 | const std::string src = GetInputAttribute(attribute, input_mode, vertex) + GetSwizzle(elem); | ||
| 524 | shader.AddLine(dest + " = " + src + ';'); | ||
| 525 | } | ||
| 526 | |||
| 527 | std::string GetLocalMemoryAsFloat(const std::string& index) { | ||
| 528 | return "lmem[" + index + ']'; | ||
| 529 | } | ||
| 530 | 97 | ||
| 531 | std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { | 98 | /// Translate topology |
| 532 | const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; | 99 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 533 | return func + "(lmem[" + index + "])"; | 100 | switch (topology) { |
| 101 | case Tegra::Shader::OutputTopology::PointList: | ||
| 102 | return "points"; | ||
| 103 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 104 | return "line_strip"; | ||
| 105 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 106 | return "triangle_strip"; | ||
| 107 | default: | ||
| 108 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 109 | return "points"; | ||
| 534 | } | 110 | } |
| 111 | } | ||
| 535 | 112 | ||
| 536 | void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { | 113 | /// Returns true if an object has to be treated as precise |
| 537 | shader.AddLine("lmem[" + index + "] = " + value + ';'); | 114 | static bool IsPrecise(Operation operand) { |
| 538 | } | 115 | const auto& meta = operand.GetMeta(); |
| 539 | 116 | ||
| 540 | void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, | 117 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| 541 | bool is_signed = false) { | 118 | return arithmetic->precise; |
| 542 | const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||
| 543 | shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); | ||
| 544 | } | 119 | } |
| 545 | 120 | if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { | |
| 546 | std::string GetConditionCode(const Tegra::Shader::ConditionCode cc) const { | 121 | return half_arithmetic->precise; |
| 547 | switch (cc) { | ||
| 548 | case Tegra::Shader::ConditionCode::NEU: | ||
| 549 | return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; | ||
| 550 | default: | ||
| 551 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 552 | return "false"; | ||
| 553 | } | ||
| 554 | } | 122 | } |
| 123 | return false; | ||
| 124 | } | ||
| 555 | 125 | ||
| 556 | std::string GetInternalFlag(const InternalFlag flag) const { | 126 | static bool IsPrecise(Node node) { |
| 557 | const auto index = static_cast<u32>(flag); | 127 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 558 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 128 | return IsPrecise(*operation); |
| 559 | |||
| 560 | return std::string(INTERNAL_FLAG_NAMES[index]) + '_' + suffix; | ||
| 561 | } | 129 | } |
| 130 | return false; | ||
| 131 | } | ||
| 562 | 132 | ||
| 563 | void SetInternalFlag(const InternalFlag flag, const std::string& value) const { | 133 | class GLSLDecompiler final { |
| 564 | shader.AddLine(GetInternalFlag(flag) + " = " + value + ';'); | 134 | public: |
| 565 | } | 135 | explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix) |
| 136 | : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | ||
| 566 | 137 | ||
| 567 | /** | 138 | void Decompile() { |
| 568 | * Writes code that does a output attribute assignment to register operation. Output attributes | 139 | DeclareVertex(); |
| 569 | * are stored as floats, so this may require conversion. | 140 | DeclareGeometry(); |
| 570 | * @param attribute The destination output attribute. | 141 | DeclareRegisters(); |
| 571 | * @param elem The element to use for the operation. | 142 | DeclarePredicates(); |
| 572 | * @param val_reg The register to use as the source value. | 143 | DeclareLocalMemory(); |
| 573 | * @param buf_reg The register that tells which buffer to write to (used in geometry shaders). | 144 | DeclareInternalFlags(); |
| 574 | */ | 145 | DeclareInputAttributes(); |
| 575 | void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& val_reg, | 146 | DeclareOutputAttributes(); |
| 576 | const Register& buf_reg) { | 147 | DeclareConstantBuffers(); |
| 577 | const std::string dest = GetOutputAttribute(attribute); | 148 | DeclareGlobalMemory(); |
| 578 | const std::string src = GetRegisterAsFloat(val_reg); | 149 | DeclareSamplers(); |
| 579 | if (dest.empty()) | ||
| 580 | return; | ||
| 581 | 150 | ||
| 582 | // Can happen with unknown/unimplemented output attributes, in which case we ignore the | 151 | code.AddLine("void execute_" + suffix + "() {"); |
| 583 | // instruction for now. | 152 | ++code.scope; |
| 584 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 585 | // TODO(Rodrigo): nouveau sets some attributes after setting emitting a geometry | ||
| 586 | // shader. These instructions use a dirty register as buffer index, to avoid some | ||
| 587 | // drivers from complaining about out of boundary writes, guard them. | ||
| 588 | const std::string buf_index{"((" + GetRegisterAsInteger(buf_reg) + ") % " + | ||
| 589 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'}; | ||
| 590 | shader.AddLine("amem[" + buf_index + "][" + | ||
| 591 | std::to_string(static_cast<u32>(attribute)) + ']' + GetSwizzle(elem) + | ||
| 592 | " = " + src + ';'); | ||
| 593 | return; | ||
| 594 | } | ||
| 595 | 153 | ||
| 596 | switch (attribute) { | 154 | // VM's program counter |
| 597 | case Attribute::Index::ClipDistances0123: | 155 | const auto first_address = ir.GetBasicBlocks().begin()->first; |
| 598 | case Attribute::Index::ClipDistances4567: { | 156 | code.AddLine("uint jmp_to = " + std::to_string(first_address) + "u;"); |
| 599 | const u64 index = (attribute == Attribute::Index::ClipDistances4567 ? 4 : 0) + elem; | ||
| 600 | UNIMPLEMENTED_IF_MSG( | ||
| 601 | ((header.vtg.clip_distances >> index) & 1) == 0, | ||
| 602 | "Shader is setting gl_ClipDistance{} without enabling it in the header", index); | ||
| 603 | |||
| 604 | clip_distances[index] = true; | ||
| 605 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 606 | shader.AddLine(dest + '[' + std::to_string(index) + "] = " + src + ';'); | ||
| 607 | break; | ||
| 608 | } | ||
| 609 | case Attribute::Index::PointSize: | ||
| 610 | fixed_pipeline_output_attributes_used.insert(attribute); | ||
| 611 | shader.AddLine(dest + " = " + src + ';'); | ||
| 612 | break; | ||
| 613 | default: | ||
| 614 | shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); | ||
| 615 | break; | ||
| 616 | } | ||
| 617 | } | ||
| 618 | 157 | ||
| 619 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. | 158 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems |
| 620 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, | 159 | // unlikely that shaders will use 20 nested SSYs and PBKs. |
| 621 | Register::Size size = Register::Size::Word) { | 160 | constexpr u32 FLOW_STACK_SIZE = 20; |
| 622 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); | 161 | code.AddLine(fmt::format("uint flow_stack[{}];", FLOW_STACK_SIZE)); |
| 623 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + | 162 | code.AddLine("uint flow_stack_top = 0u;"); |
| 624 | std::to_string(offset % 4) + ']'; | ||
| 625 | 163 | ||
| 626 | if (type == GLSLRegister::Type::Float) { | 164 | code.AddLine("while (true) {"); |
| 627 | // Do nothing, default | 165 | ++code.scope; |
| 628 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 629 | value = "floatBitsToInt(" + value + ')'; | ||
| 630 | } else if (type == GLSLRegister::Type::UnsignedInteger) { | ||
| 631 | value = "floatBitsToUint(" + value + ')'; | ||
| 632 | } else { | ||
| 633 | UNREACHABLE(); | ||
| 634 | } | ||
| 635 | 166 | ||
| 636 | return ConvertIntegerSize(value, size); | 167 | code.AddLine("switch (jmp_to) {"); |
| 637 | } | ||
| 638 | 168 | ||
| 639 | std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, | 169 | for (const auto& pair : ir.GetBasicBlocks()) { |
| 640 | GLSLRegister::Type type) { | 170 | const auto [address, bb] = pair; |
| 641 | declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); | 171 | code.AddLine(fmt::format("case 0x{:x}u: {{", address)); |
| 172 | ++code.scope; | ||
| 642 | 173 | ||
| 643 | const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); | 174 | VisitBasicBlock(bb); |
| 644 | const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + | ||
| 645 | final_offset + " % 4]"; | ||
| 646 | 175 | ||
| 647 | if (type == GLSLRegister::Type::Float) { | 176 | --code.scope; |
| 648 | return value; | 177 | code.AddLine('}'); |
| 649 | } else if (type == GLSLRegister::Type::Integer) { | ||
| 650 | return "floatBitsToInt(" + value + ')'; | ||
| 651 | } else { | ||
| 652 | UNREACHABLE(); | ||
| 653 | return value; | ||
| 654 | } | 178 | } |
| 655 | } | ||
| 656 | |||
| 657 | /// Add declarations. | ||
| 658 | void GenerateDeclarations(const std::string& suffix) { | ||
| 659 | GenerateVertex(); | ||
| 660 | GenerateRegisters(suffix); | ||
| 661 | GenerateLocalMemory(); | ||
| 662 | GenerateInternalFlags(); | ||
| 663 | GenerateInputAttrs(); | ||
| 664 | GenerateOutputAttrs(); | ||
| 665 | GenerateConstBuffers(); | ||
| 666 | GenerateSamplers(); | ||
| 667 | GenerateGeometry(); | ||
| 668 | } | ||
| 669 | 179 | ||
| 670 | /// Returns a list of constant buffer declarations. | 180 | code.AddLine("default: return;"); |
| 671 | std::vector<ConstBufferEntry> GetConstBuffersDeclarations() const { | 181 | code.AddLine('}'); |
| 672 | std::vector<ConstBufferEntry> result; | ||
| 673 | std::copy_if(declr_const_buffers.begin(), declr_const_buffers.end(), | ||
| 674 | std::back_inserter(result), [](const auto& entry) { return entry.IsUsed(); }); | ||
| 675 | return result; | ||
| 676 | } | ||
| 677 | 182 | ||
| 678 | /// Returns a list of samplers used in the shader. | 183 | for (std::size_t i = 0; i < 2; ++i) { |
| 679 | const std::vector<SamplerEntry>& GetSamplers() const { | 184 | --code.scope; |
| 680 | return used_samplers; | 185 | code.AddLine('}'); |
| 681 | } | ||
| 682 | |||
| 683 | /// Returns an array of the used clip distances. | ||
| 684 | const std::array<bool, Maxwell::NumClipDistances>& GetClipDistances() const { | ||
| 685 | return clip_distances; | ||
| 686 | } | ||
| 687 | |||
| 688 | /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if | ||
| 689 | /// necessary. | ||
| 690 | std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, | ||
| 691 | bool is_array, bool is_shadow) { | ||
| 692 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 693 | |||
| 694 | // If this sampler has already been used, return the existing mapping. | ||
| 695 | const auto itr = | ||
| 696 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 697 | [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); | ||
| 698 | |||
| 699 | if (itr != used_samplers.end()) { | ||
| 700 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 701 | itr->IsShadow() == is_shadow); | ||
| 702 | return itr->GetName(); | ||
| 703 | } | 186 | } |
| 704 | |||
| 705 | // Otherwise create a new mapping for this sampler | ||
| 706 | const std::size_t next_index = used_samplers.size(); | ||
| 707 | const SamplerEntry entry{stage, offset, next_index, type, is_array, is_shadow}; | ||
| 708 | used_samplers.emplace_back(entry); | ||
| 709 | return entry.GetName(); | ||
| 710 | } | 187 | } |
| 711 | 188 | ||
| 712 | void SetLocalMemory(u64 lmem) { | 189 | std::string GetResult() { |
| 713 | local_memory_size = lmem; | 190 | return code.GetResult(); |
| 714 | } | 191 | } |
| 715 | 192 | ||
| 716 | private: | 193 | ShaderEntries GetShaderEntries() const { |
| 717 | /// Generates declarations for registers. | 194 | ShaderEntries entries; |
| 718 | void GenerateRegisters(const std::string& suffix) { | 195 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 719 | for (const auto& reg : regs) { | 196 | entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), |
| 720 | declarations.AddLine(GLSLRegister::GetTypeString() + ' ' + reg.GetPrefixString() + | 197 | cbuf.first); |
| 721 | std::to_string(reg.GetIndex()) + '_' + suffix + " = 0;"); | ||
| 722 | } | 198 | } |
| 723 | declarations.AddNewLine(); | 199 | for (const auto& sampler : ir.GetSamplers()) { |
| 724 | } | 200 | entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); |
| 725 | |||
| 726 | /// Generates declarations for local memory. | ||
| 727 | void GenerateLocalMemory() { | ||
| 728 | if (local_memory_size > 0) { | ||
| 729 | declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + | ||
| 730 | "];"); | ||
| 731 | declarations.AddNewLine(); | ||
| 732 | } | 201 | } |
| 733 | } | 202 | for (const auto& gmem : ir.GetGlobalMemoryBases()) { |
| 734 | 203 | entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, | |
| 735 | /// Generates declarations for internal flags. | 204 | GetGlobalMemoryBlock(gmem)); |
| 736 | void GenerateInternalFlags() { | ||
| 737 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||
| 738 | const InternalFlag code = static_cast<InternalFlag>(flag); | ||
| 739 | declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); | ||
| 740 | } | 205 | } |
| 741 | declarations.AddNewLine(); | 206 | entries.clip_distances = ir.GetClipDistances(); |
| 207 | entries.shader_length = ir.GetLength(); | ||
| 208 | return entries; | ||
| 742 | } | 209 | } |
| 743 | 210 | ||
| 744 | /// Generates declarations for input attributes. | 211 | private: |
| 745 | void GenerateInputAttrs() { | 212 | using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation); |
| 746 | for (const auto element : declr_input_attribute) { | 213 | using OperationDecompilersArray = |
| 747 | // TODO(bunnei): Use proper number of elements for these | 214 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; |
| 748 | u32 idx = | ||
| 749 | static_cast<u32>(element.first) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 750 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) { | ||
| 751 | // If inputs are varyings, add an offset | ||
| 752 | idx += GENERIC_VARYING_START_LOCATION; | ||
| 753 | } | ||
| 754 | |||
| 755 | std::string attr{GetInputAttribute(element.first, element.second)}; | ||
| 756 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry) { | ||
| 757 | attr = "gs_" + attr + "[]"; | ||
| 758 | } | ||
| 759 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") " + | ||
| 760 | GetInputFlags(element.first) + "in vec4 " + attr + ';'); | ||
| 761 | } | ||
| 762 | |||
| 763 | declarations.AddNewLine(); | ||
| 764 | } | ||
| 765 | 215 | ||
| 766 | /// Generates declarations for output attributes. | 216 | void DeclareVertex() { |
| 767 | void GenerateOutputAttrs() { | 217 | if (stage != ShaderStage::Vertex) |
| 768 | for (const auto& index : declr_output_attribute) { | 218 | return; |
| 769 | // TODO(bunnei): Use proper number of elements for these | ||
| 770 | const u32 idx = static_cast<u32>(index) - | ||
| 771 | static_cast<u32>(Attribute::Index::Attribute_0) + | ||
| 772 | GENERIC_VARYING_START_LOCATION; | ||
| 773 | declarations.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + | ||
| 774 | GetOutputAttribute(index) + ';'); | ||
| 775 | } | ||
| 776 | declarations.AddNewLine(); | ||
| 777 | } | ||
| 778 | |||
| 779 | /// Generates declarations for constant buffers. | ||
| 780 | void GenerateConstBuffers() { | ||
| 781 | for (const auto& entry : GetConstBuffersDeclarations()) { | ||
| 782 | declarations.AddLine("layout (std140) uniform " + entry.GetName()); | ||
| 783 | declarations.AddLine('{'); | ||
| 784 | declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + | ||
| 785 | "[MAX_CONSTBUFFER_ELEMENTS];"); | ||
| 786 | declarations.AddLine("};"); | ||
| 787 | declarations.AddNewLine(); | ||
| 788 | } | ||
| 789 | declarations.AddNewLine(); | ||
| 790 | } | ||
| 791 | 219 | ||
| 792 | /// Generates declarations for samplers. | 220 | DeclareVertexRedeclarations(); |
| 793 | void GenerateSamplers() { | ||
| 794 | const auto& samplers = GetSamplers(); | ||
| 795 | for (const auto& sampler : samplers) { | ||
| 796 | declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + | ||
| 797 | ';'); | ||
| 798 | } | ||
| 799 | declarations.AddNewLine(); | ||
| 800 | } | 221 | } |
| 801 | 222 | ||
| 802 | /// Generates declarations used for geometry shaders. | 223 | void DeclareGeometry() { |
| 803 | void GenerateGeometry() { | 224 | if (stage != ShaderStage::Geometry) |
| 804 | if (stage != Maxwell3D::Regs::ShaderStage::Geometry) | ||
| 805 | return; | 225 | return; |
| 806 | 226 | ||
| 807 | declarations.AddLine( | 227 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 808 | "layout (" + GetTopologyName(header.common3.output_topology) + | 228 | const auto max_vertices = std::to_string(header.common4.max_output_vertices); |
| 809 | ", max_vertices = " + std::to_string(header.common4.max_output_vertices) + ") out;"); | 229 | code.AddLine("layout (" + topology + ", max_vertices = " + max_vertices + ") out;"); |
| 810 | declarations.AddNewLine(); | 230 | code.AddNewLine(); |
| 811 | |||
| 812 | declarations.AddLine("vec4 amem[" + std::to_string(MAX_GEOMETRY_BUFFERS) + "][" + | ||
| 813 | std::to_string(MAX_ATTRIBUTES) + "];"); | ||
| 814 | declarations.AddNewLine(); | ||
| 815 | |||
| 816 | constexpr char buffer[] = "amem[output_buffer]"; | ||
| 817 | declarations.AddLine("void emit_vertex(uint output_buffer) {"); | ||
| 818 | ++declarations.scope; | ||
| 819 | for (const auto element : declr_output_attribute) { | ||
| 820 | declarations.AddLine(GetOutputAttribute(element) + " = " + buffer + '[' + | ||
| 821 | std::to_string(static_cast<u32>(element)) + "];"); | ||
| 822 | } | ||
| 823 | |||
| 824 | declarations.AddLine("position = " + std::string(buffer) + '[' + | ||
| 825 | std::to_string(static_cast<u32>(Attribute::Index::Position)) + "];"); | ||
| 826 | 231 | ||
| 827 | // If a geometry shader is attached, it will always flip (it's the last stage before | 232 | DeclareVertexRedeclarations(); |
| 828 | // fragment). For more info about flipping, refer to gl_shader_gen.cpp. | ||
| 829 | declarations.AddLine("position.xy *= viewport_flip.xy;"); | ||
| 830 | declarations.AddLine("gl_Position = position;"); | ||
| 831 | declarations.AddLine("position.w = 1.0;"); | ||
| 832 | declarations.AddLine("EmitVertex();"); | ||
| 833 | --declarations.scope; | ||
| 834 | declarations.AddLine('}'); | ||
| 835 | declarations.AddNewLine(); | ||
| 836 | } | 233 | } |
| 837 | 234 | ||
| 838 | void GenerateVertex() { | 235 | void DeclareVertexRedeclarations() { |
| 839 | if (stage != Maxwell3D::Regs::ShaderStage::Vertex) | ||
| 840 | return; | ||
| 841 | bool clip_distances_declared = false; | 236 | bool clip_distances_declared = false; |
| 842 | 237 | ||
| 843 | declarations.AddLine("out gl_PerVertex {"); | 238 | code.AddLine("out gl_PerVertex {"); |
| 844 | ++declarations.scope; | 239 | ++code.scope; |
| 845 | declarations.AddLine("vec4 gl_Position;"); | 240 | |
| 846 | for (auto& o : fixed_pipeline_output_attributes_used) { | 241 | code.AddLine("vec4 gl_Position;"); |
| 242 | |||
| 243 | for (const auto o : ir.GetOutputAttributes()) { | ||
| 847 | if (o == Attribute::Index::PointSize) | 244 | if (o == Attribute::Index::PointSize) |
| 848 | declarations.AddLine("float gl_PointSize;"); | 245 | code.AddLine("float gl_PointSize;"); |
| 849 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | 246 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || |
| 850 | o == Attribute::Index::ClipDistances4567)) { | 247 | o == Attribute::Index::ClipDistances4567)) { |
| 851 | declarations.AddLine("float gl_ClipDistance[];"); | 248 | code.AddLine("float gl_ClipDistance[];"); |
| 852 | clip_distances_declared = true; | 249 | clip_distances_declared = true; |
| 853 | } | 250 | } |
| 854 | } | 251 | } |
| 855 | --declarations.scope; | ||
| 856 | declarations.AddLine("};"); | ||
| 857 | } | ||
| 858 | |||
| 859 | /// Generates code representing a temporary (GPR) register. | ||
| 860 | std::string GetRegister(const Register& reg, unsigned elem) { | ||
| 861 | if (reg == Register::ZeroIndex) { | ||
| 862 | return "0"; | ||
| 863 | } | ||
| 864 | 252 | ||
| 865 | return regs[reg.GetSwizzledIndex(elem)].GetString(); | 253 | --code.scope; |
| 866 | } | 254 | code.AddLine("};"); |
| 867 | 255 | code.AddNewLine(); | |
| 868 | /** | 256 | } |
| 869 | * Writes code that does a register assignment to value operation. | ||
| 870 | * @param reg The destination register to use. | ||
| 871 | * @param elem The element to use for the operation. | ||
| 872 | * @param value The code representing the value to assign. | ||
| 873 | * @param dest_num_components Number of components in the destination. | ||
| 874 | * @param value_num_components Number of components in the value. | ||
| 875 | * @param dest_elem Optional, the destination element to use for the operation. | ||
| 876 | */ | ||
| 877 | void SetRegister(const Register& reg, u64 elem, const std::string& value, | ||
| 878 | u64 dest_num_components, u64 value_num_components, u64 dest_elem, | ||
| 879 | bool precise) { | ||
| 880 | if (reg == Register::ZeroIndex) { | ||
| 881 | // Setting RZ is a nop in hardware. | ||
| 882 | return; | ||
| 883 | } | ||
| 884 | |||
| 885 | std::string dest = GetRegister(reg, static_cast<u32>(dest_elem)); | ||
| 886 | if (dest_num_components > 1) { | ||
| 887 | dest += GetSwizzle(elem); | ||
| 888 | } | ||
| 889 | |||
| 890 | std::string src = '(' + value + ')'; | ||
| 891 | if (value_num_components > 1) { | ||
| 892 | src += GetSwizzle(elem); | ||
| 893 | } | ||
| 894 | |||
| 895 | if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 896 | const auto scope = shader.Scope(); | ||
| 897 | 257 | ||
| 898 | // This avoids optimizations of constant propagation and keeps the code as the original | 258 | void DeclareRegisters() { |
| 899 | // Sadly using the precise keyword causes "linking" errors on fragment shaders. | 259 | const auto& registers = ir.GetRegisters(); |
| 900 | shader.AddLine("precise float tmp = " + src + ';'); | 260 | for (const u32 gpr : registers) { |
| 901 | shader.AddLine(dest + " = tmp;"); | 261 | code.AddLine("float " + GetRegister(gpr) + " = 0;"); |
| 902 | } else { | ||
| 903 | shader.AddLine(dest + " = " + src + ';'); | ||
| 904 | } | 262 | } |
| 263 | if (!registers.empty()) | ||
| 264 | code.AddNewLine(); | ||
| 905 | } | 265 | } |
| 906 | 266 | ||
| 907 | /// Build the GLSL register list. | 267 | void DeclarePredicates() { |
| 908 | void BuildRegisterList() { | 268 | const auto& predicates = ir.GetPredicates(); |
| 909 | regs.reserve(Register::NumRegisters); | 269 | for (const auto pred : predicates) { |
| 910 | 270 | code.AddLine("bool " + GetPredicate(pred) + " = false;"); | |
| 911 | for (std::size_t index = 0; index < Register::NumRegisters; ++index) { | ||
| 912 | regs.emplace_back(index, suffix); | ||
| 913 | } | 271 | } |
| 272 | if (!predicates.empty()) | ||
| 273 | code.AddNewLine(); | ||
| 914 | } | 274 | } |
| 915 | 275 | ||
| 916 | void BuildInputList() { | 276 | void DeclareLocalMemory() { |
| 917 | const u32 size = static_cast<u32>(Attribute::Index::Attribute_31) - | 277 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { |
| 918 | static_cast<u32>(Attribute::Index::Attribute_0) + 1; | 278 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 919 | declr_input_attribute.reserve(size); | 279 | code.AddLine("float " + GetLocalMemory() + '[' + std::to_string(element_count) + "];"); |
| 280 | code.AddNewLine(); | ||
| 281 | } | ||
| 920 | } | 282 | } |
| 921 | 283 | ||
| 922 | /// Generates code representing an input attribute register. | 284 | void DeclareInternalFlags() { |
| 923 | std::string GetInputAttribute(Attribute::Index attribute, | 285 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { |
| 924 | const Tegra::Shader::IpaMode& input_mode, | 286 | const InternalFlag flag_code = static_cast<InternalFlag>(flag); |
| 925 | std::optional<Register> vertex = {}) { | 287 | code.AddLine("bool " + GetInternalFlag(flag_code) + " = false;"); |
| 926 | auto GeometryPass = [&](const std::string& name) { | ||
| 927 | if (stage == Maxwell3D::Regs::ShaderStage::Geometry && vertex) { | ||
| 928 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games set | ||
| 929 | // an 0x80000000 index for those and the shader fails to build. Find out why this | ||
| 930 | // happens and what's its intent. | ||
| 931 | return "gs_" + name + '[' + GetRegisterAsInteger(*vertex, 0, false) + | ||
| 932 | " % MAX_VERTEX_INPUT]"; | ||
| 933 | } | ||
| 934 | return name; | ||
| 935 | }; | ||
| 936 | |||
| 937 | switch (attribute) { | ||
| 938 | case Attribute::Index::Position: | ||
| 939 | if (stage != Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 940 | return GeometryPass("position"); | ||
| 941 | } else { | ||
| 942 | return "vec4(gl_FragCoord.x, gl_FragCoord.y, gl_FragCoord.z, 1.0)"; | ||
| 943 | } | ||
| 944 | case Attribute::Index::PointCoord: | ||
| 945 | return "vec4(gl_PointCoord.x, gl_PointCoord.y, 0, 0)"; | ||
| 946 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 947 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 948 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 949 | // shader. | ||
| 950 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex); | ||
| 951 | // Config pack's first value is instance_id. | ||
| 952 | return "vec4(0, 0, uintBitsToFloat(config_pack[0]), uintBitsToFloat(gl_VertexID))"; | ||
| 953 | case Attribute::Index::FrontFacing: | ||
| 954 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 955 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 956 | return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))"; | ||
| 957 | default: | ||
| 958 | const u32 index{static_cast<u32>(attribute) - | ||
| 959 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 960 | if (attribute >= Attribute::Index::Attribute_0 && | ||
| 961 | attribute <= Attribute::Index::Attribute_31) { | ||
| 962 | if (declr_input_attribute.count(attribute) == 0) { | ||
| 963 | declr_input_attribute[attribute] = input_mode; | ||
| 964 | } else { | ||
| 965 | UNIMPLEMENTED_IF_MSG(declr_input_attribute[attribute] != input_mode, | ||
| 966 | "Multiple input modes for the same attribute"); | ||
| 967 | } | ||
| 968 | return GeometryPass("input_attribute_" + std::to_string(index)); | ||
| 969 | } | ||
| 970 | |||
| 971 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | ||
| 972 | } | 288 | } |
| 973 | 289 | code.AddNewLine(); | |
| 974 | return "vec4(0, 0, 0, 0)"; | ||
| 975 | } | 290 | } |
| 976 | 291 | ||
| 977 | std::string GetInputFlags(const Attribute::Index attribute) { | 292 | std::string GetInputFlags(const IpaMode& input_mode) { |
| 978 | const Tegra::Shader::IpaSampleMode sample_mode = | 293 | const IpaSampleMode sample_mode = input_mode.sampling_mode; |
| 979 | declr_input_attribute[attribute].sampling_mode; | 294 | const IpaInterpMode interp_mode = input_mode.interpolation_mode; |
| 980 | const Tegra::Shader::IpaInterpMode interp_mode = | ||
| 981 | declr_input_attribute[attribute].interpolation_mode; | ||
| 982 | std::string out; | 295 | std::string out; |
| 296 | |||
| 983 | switch (interp_mode) { | 297 | switch (interp_mode) { |
| 984 | case Tegra::Shader::IpaInterpMode::Flat: { | 298 | case IpaInterpMode::Flat: |
| 985 | out += "flat "; | 299 | out += "flat "; |
| 986 | break; | 300 | break; |
| 987 | } | 301 | case IpaInterpMode::Linear: |
| 988 | case Tegra::Shader::IpaInterpMode::Linear: { | ||
| 989 | out += "noperspective "; | 302 | out += "noperspective "; |
| 990 | break; | 303 | break; |
| 991 | } | 304 | case IpaInterpMode::Perspective: |
| 992 | case Tegra::Shader::IpaInterpMode::Perspective: { | ||
| 993 | // Default, Smooth | 305 | // Default, Smooth |
| 994 | break; | 306 | break; |
| 995 | } | 307 | default: |
| 996 | default: { | ||
| 997 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); | 308 | UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode)); |
| 998 | } | 309 | } |
| 999 | } | ||
| 1000 | switch (sample_mode) { | 310 | switch (sample_mode) { |
| 1001 | case Tegra::Shader::IpaSampleMode::Centroid: | 311 | case IpaSampleMode::Centroid: |
| 1002 | // It can be implemented with the "centroid " keyword in glsl | 312 | // It can be implemented with the "centroid " keyword in GLSL |
| 1003 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); | 313 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid"); |
| 1004 | break; | 314 | break; |
| 1005 | case Tegra::Shader::IpaSampleMode::Default: | 315 | case IpaSampleMode::Default: |
| 1006 | // Default, n/a | 316 | // Default, n/a |
| 1007 | break; | 317 | break; |
| 1008 | default: { | 318 | default: |
| 1009 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); | 319 | UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode)); |
| 1010 | break; | ||
| 1011 | } | ||
| 1012 | } | 320 | } |
| 1013 | return out; | 321 | return out; |
| 1014 | } | 322 | } |
| 1015 | 323 | ||
| 1016 | /// Generates code representing the declaration name of an output attribute register. | 324 | void DeclareInputAttributes() { |
| 1017 | std::string GetOutputAttribute(Attribute::Index attribute) { | 325 | const auto& attributes = ir.GetInputAttributes(); |
| 1018 | switch (attribute) { | 326 | for (const auto element : attributes) { |
| 1019 | case Attribute::Index::PointSize: | 327 | const Attribute::Index index = element.first; |
| 1020 | return "gl_PointSize"; | 328 | const IpaMode& input_mode = *element.second.begin(); |
| 1021 | case Attribute::Index::Position: | 329 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { |
| 1022 | return "position"; | 330 | // Skip when it's not a generic attribute |
| 1023 | case Attribute::Index::ClipDistances0123: | 331 | continue; |
| 1024 | case Attribute::Index::ClipDistances4567: { | ||
| 1025 | return "gl_ClipDistance"; | ||
| 1026 | } | ||
| 1027 | default: | ||
| 1028 | const u32 index{static_cast<u32>(attribute) - | ||
| 1029 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 1030 | if (attribute >= Attribute::Index::Attribute_0) { | ||
| 1031 | declr_output_attribute.insert(attribute); | ||
| 1032 | return "output_attribute_" + std::to_string(index); | ||
| 1033 | } | 332 | } |
| 1034 | 333 | ||
| 1035 | UNIMPLEMENTED_MSG("Unhandled output attribute={}", index); | 334 | ASSERT(element.second.size() > 0); |
| 1036 | return {}; | 335 | UNIMPLEMENTED_IF_MSG(element.second.size() > 1, |
| 1037 | } | 336 | "Multiple input flag modes are not supported in GLSL"); |
| 1038 | } | ||
| 1039 | |||
| 1040 | ShaderWriter& shader; | ||
| 1041 | ShaderWriter& declarations; | ||
| 1042 | std::vector<GLSLRegister> regs; | ||
| 1043 | std::unordered_map<Attribute::Index, Tegra::Shader::IpaMode> declr_input_attribute; | ||
| 1044 | std::set<Attribute::Index> declr_output_attribute; | ||
| 1045 | std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; | ||
| 1046 | std::vector<SamplerEntry> used_samplers; | ||
| 1047 | const Maxwell3D::Regs::ShaderStage& stage; | ||
| 1048 | const std::string& suffix; | ||
| 1049 | const Tegra::Shader::Header& header; | ||
| 1050 | std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used; | ||
| 1051 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 1052 | u64 local_memory_size; | ||
| 1053 | }; | ||
| 1054 | |||
| 1055 | class GLSLGenerator { | ||
| 1056 | public: | ||
| 1057 | GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, | ||
| 1058 | u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix, | ||
| 1059 | std::size_t shader_length) | ||
| 1060 | : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||
| 1061 | stage(stage), suffix(suffix), shader_length(shader_length) { | ||
| 1062 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 1063 | local_memory_size = header.GetLocalMemorySize(); | ||
| 1064 | regs.SetLocalMemory(local_memory_size); | ||
| 1065 | Generate(suffix); | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | std::string GetShaderCode() { | ||
| 1069 | return declarations.GetResult() + shader.GetResult(); | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | /// Returns entries in the shader that are useful for external functions | ||
| 1073 | ShaderEntries GetEntries() const { | ||
| 1074 | return {regs.GetConstBuffersDeclarations(), regs.GetSamplers(), regs.GetClipDistances(), | ||
| 1075 | shader_length}; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | private: | ||
| 1079 | /// Gets the Subroutine object corresponding to the specified address. | ||
| 1080 | const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||
| 1081 | const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | ||
| 1082 | ASSERT(iter != subroutines.end()); | ||
| 1083 | return *iter; | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | /// Generates code representing a 19-bit immediate value | ||
| 1087 | static std::string GetImmediate19(const Instruction& instr) { | ||
| 1088 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); | ||
| 1089 | } | ||
| 1090 | 337 | ||
| 1091 | /// Generates code representing a 32-bit immediate value | 338 | // TODO(bunnei): Use proper number of elements for these |
| 1092 | static std::string GetImmediate32(const Instruction& instr) { | 339 | u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); |
| 1093 | return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); | 340 | if (stage != ShaderStage::Vertex) { |
| 1094 | } | 341 | // If inputs are varyings, add an offset |
| 342 | idx += GENERIC_VARYING_START_LOCATION; | ||
| 343 | } | ||
| 1095 | 344 | ||
| 1096 | /// Generates code representing a vec2 pair unpacked from a half float immediate | 345 | std::string attr = GetInputAttribute(index); |
| 1097 | static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { | 346 | if (stage == ShaderStage::Geometry) { |
| 1098 | const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates())); | 347 | attr = "gs_" + attr + "[]"; |
| 1099 | if (!negate) { | 348 | } |
| 1100 | return immediate; | 349 | code.AddLine("layout (location = " + std::to_string(idx) + ") " + |
| 350 | GetInputFlags(input_mode) + "in vec4 " + attr + ';'); | ||
| 1101 | } | 351 | } |
| 1102 | const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; | 352 | if (!attributes.empty()) |
| 1103 | const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; | 353 | code.AddNewLine(); |
| 1104 | const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)"; | ||
| 1105 | |||
| 1106 | return '(' + immediate + " * " + negate_vec + ')'; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | /// Generates code representing a texture sampler. | ||
| 1110 | std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, | ||
| 1111 | bool is_shadow) { | ||
| 1112 | return regs.AccessSampler(sampler, type, is_array, is_shadow); | ||
| 1113 | } | 354 | } |
| 1114 | 355 | ||
| 1115 | /** | 356 | void DeclareOutputAttributes() { |
| 1116 | * Adds code that calls a subroutine. | 357 | const auto& attributes = ir.GetOutputAttributes(); |
| 1117 | * @param subroutine the subroutine to call. | 358 | for (const auto index : attributes) { |
| 1118 | */ | 359 | if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) { |
| 1119 | void CallSubroutine(const Subroutine& subroutine) { | 360 | // Skip when it's not a generic attribute |
| 1120 | if (subroutine.exit_method == ExitMethod::AlwaysEnd) { | 361 | continue; |
| 1121 | shader.AddLine(subroutine.GetName() + "();"); | 362 | } |
| 1122 | shader.AddLine("return true;"); | 363 | // TODO(bunnei): Use proper number of elements for these |
| 1123 | } else if (subroutine.exit_method == ExitMethod::Conditional) { | 364 | const auto idx = static_cast<u32>(index) - |
| 1124 | shader.AddLine("if (" + subroutine.GetName() + "()) { return true; }"); | 365 | static_cast<u32>(Attribute::Index::Attribute_0) + |
| 1125 | } else { | 366 | GENERIC_VARYING_START_LOCATION; |
| 1126 | shader.AddLine(subroutine.GetName() + "();"); | 367 | code.AddLine("layout (location = " + std::to_string(idx) + ") out vec4 " + |
| 368 | GetOutputAttribute(index) + ';'); | ||
| 1127 | } | 369 | } |
| 370 | if (!attributes.empty()) | ||
| 371 | code.AddNewLine(); | ||
| 1128 | } | 372 | } |
| 1129 | 373 | ||
| 1130 | /* | 374 | void DeclareConstantBuffers() { |
| 1131 | * Writes code that assigns a predicate boolean variable. | 375 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 1132 | * @param pred The id of the predicate to write to. | 376 | const auto [index, size] = entry; |
| 1133 | * @param value The expression value to assign to the predicate. | 377 | code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) + |
| 1134 | */ | 378 | ") uniform " + GetConstBufferBlock(index) + " {"); |
| 1135 | void SetPredicate(u64 pred, const std::string& value) { | 379 | code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); |
| 1136 | using Tegra::Shader::Pred; | 380 | code.AddLine("};"); |
| 1137 | // Can't assign to the constant predicate. | 381 | code.AddNewLine(); |
| 1138 | ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); | ||
| 1139 | |||
| 1140 | std::string variable = 'p' + std::to_string(pred) + '_' + suffix; | ||
| 1141 | shader.AddLine(variable + " = " + value + ';'); | ||
| 1142 | declr_predicates.insert(std::move(variable)); | ||
| 1143 | } | ||
| 1144 | |||
| 1145 | /* | ||
| 1146 | * Returns the condition to use in the 'if' for a predicated instruction. | ||
| 1147 | * @param instr Instruction to generate the if condition for. | ||
| 1148 | * @returns string containing the predicate condition. | ||
| 1149 | */ | ||
| 1150 | std::string GetPredicateCondition(u64 index, bool negate) { | ||
| 1151 | using Tegra::Shader::Pred; | ||
| 1152 | std::string variable; | ||
| 1153 | |||
| 1154 | // Index 7 is used as an 'Always True' condition. | ||
| 1155 | if (index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 1156 | variable = "true"; | ||
| 1157 | } else { | ||
| 1158 | variable = 'p' + std::to_string(index) + '_' + suffix; | ||
| 1159 | declr_predicates.insert(variable); | ||
| 1160 | } | ||
| 1161 | if (negate) { | ||
| 1162 | return "!(" + variable + ')'; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | return variable; | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | /** | ||
| 1169 | * Returns the comparison string to use to compare two values in the 'set' family of | ||
| 1170 | * instructions. | ||
| 1171 | * @param condition The condition used in the 'set'-family instruction. | ||
| 1172 | * @param op_a First operand to use for the comparison. | ||
| 1173 | * @param op_b Second operand to use for the comparison. | ||
| 1174 | * @returns String corresponding to the GLSL operator that matches the desired comparison. | ||
| 1175 | */ | ||
| 1176 | std::string GetPredicateComparison(Tegra::Shader::PredCondition condition, | ||
| 1177 | const std::string& op_a, const std::string& op_b) const { | ||
| 1178 | using Tegra::Shader::PredCondition; | ||
| 1179 | static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { | ||
| 1180 | {PredCondition::LessThan, "<"}, | ||
| 1181 | {PredCondition::Equal, "=="}, | ||
| 1182 | {PredCondition::LessEqual, "<="}, | ||
| 1183 | {PredCondition::GreaterThan, ">"}, | ||
| 1184 | {PredCondition::NotEqual, "!="}, | ||
| 1185 | {PredCondition::GreaterEqual, ">="}, | ||
| 1186 | {PredCondition::LessThanWithNan, "<"}, | ||
| 1187 | {PredCondition::NotEqualWithNan, "!="}, | ||
| 1188 | {PredCondition::LessEqualWithNan, "<="}, | ||
| 1189 | {PredCondition::GreaterThanWithNan, ">"}, | ||
| 1190 | {PredCondition::GreaterEqualWithNan, ">="}}; | ||
| 1191 | |||
| 1192 | const auto& comparison{PredicateComparisonStrings.find(condition)}; | ||
| 1193 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonStrings.end(), | ||
| 1194 | "Unknown predicate comparison operation"); | ||
| 1195 | |||
| 1196 | std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; | ||
| 1197 | if (condition == PredCondition::LessThanWithNan || | ||
| 1198 | condition == PredCondition::NotEqualWithNan || | ||
| 1199 | condition == PredCondition::LessEqualWithNan || | ||
| 1200 | condition == PredCondition::GreaterThanWithNan || | ||
| 1201 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 1202 | predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | return predicate; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | /** | ||
| 1209 | * Returns the operator string to use to combine two predicates in the 'setp' family of | ||
| 1210 | * instructions. | ||
| 1211 | * @params operation The operator used in the 'setp'-family instruction. | ||
| 1212 | * @returns String corresponding to the GLSL operator that matches the desired operator. | ||
| 1213 | */ | ||
| 1214 | std::string GetPredicateCombiner(Tegra::Shader::PredOperation operation) const { | ||
| 1215 | using Tegra::Shader::PredOperation; | ||
| 1216 | static const std::unordered_map<PredOperation, const char*> PredicateOperationStrings = { | ||
| 1217 | {PredOperation::And, "&&"}, | ||
| 1218 | {PredOperation::Or, "||"}, | ||
| 1219 | {PredOperation::Xor, "^^"}, | ||
| 1220 | }; | ||
| 1221 | |||
| 1222 | auto op = PredicateOperationStrings.find(operation); | ||
| 1223 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationStrings.end(), "Unknown predicate operation"); | ||
| 1224 | return op->second; | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | /** | ||
| 1228 | * Transforms the input string GLSL operand into one that applies the abs() function and negates | ||
| 1229 | * the output if necessary. When both abs and neg are true, the negation will be applied after | ||
| 1230 | * taking the absolute value. | ||
| 1231 | * @param operand The input operand to take the abs() of, negate, or both. | ||
| 1232 | * @param abs Whether to apply the abs() function to the input operand. | ||
| 1233 | * @param neg Whether to negate the input operand. | ||
| 1234 | * @returns String corresponding to the operand after being transformed by the abs() and | ||
| 1235 | * negation operations. | ||
| 1236 | */ | ||
| 1237 | static std::string GetOperandAbsNeg(const std::string& operand, bool abs, bool neg) { | ||
| 1238 | std::string result = operand; | ||
| 1239 | |||
| 1240 | if (abs) { | ||
| 1241 | result = "abs(" + result + ')'; | ||
| 1242 | } | 382 | } |
| 1243 | |||
| 1244 | if (neg) { | ||
| 1245 | result = "-(" + result + ')'; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | return result; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | /* | ||
| 1252 | * Transforms the input string GLSL operand into an unpacked half float pair. | ||
| 1253 | * @note This function returns a float type pair instead of a half float pair. This is because | ||
| 1254 | * real half floats are not standardized in GLSL but unpackHalf2x16 (which returns a vec2) is. | ||
| 1255 | * @param operand Input operand. It has to be an unsigned integer. | ||
| 1256 | * @param type How to unpack the unsigned integer to a half float pair. | ||
| 1257 | * @param abs Get the absolute value of unpacked half floats. | ||
| 1258 | * @param neg Get the negative value of unpacked half floats. | ||
| 1259 | * @returns String corresponding to a half float pair. | ||
| 1260 | */ | ||
| 1261 | static std::string GetHalfFloat(const std::string& operand, | ||
| 1262 | Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1, | ||
| 1263 | bool abs = false, bool neg = false) { | ||
| 1264 | // "vec2" calls emitted in this function are intended to alias components. | ||
| 1265 | const std::string value = [&]() { | ||
| 1266 | switch (type) { | ||
| 1267 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1268 | return "unpackHalf2x16(" + operand + ')'; | ||
| 1269 | case Tegra::Shader::HalfType::F32: | ||
| 1270 | return "vec2(uintBitsToFloat(" + operand + "))"; | ||
| 1271 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1272 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1273 | const bool high = type == Tegra::Shader::HalfType::H1_H1; | ||
| 1274 | const char unpack_index = "xy"[high ? 1 : 0]; | ||
| 1275 | return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')'; | ||
| 1276 | } | ||
| 1277 | default: | ||
| 1278 | UNREACHABLE(); | ||
| 1279 | return std::string("vec2(0)"); | ||
| 1280 | } | ||
| 1281 | }(); | ||
| 1282 | |||
| 1283 | return GetOperandAbsNeg(value, abs, neg); | ||
| 1284 | } | ||
| 1285 | |||
| 1286 | /* | ||
| 1287 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 1288 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 1289 | */ | ||
| 1290 | bool IsSchedInstruction(u32 offset) const { | ||
| 1291 | // sched instructions appear once every 4 instructions. | ||
| 1292 | static constexpr std::size_t SchedPeriod = 4; | ||
| 1293 | u32 absolute_offset = offset - main_offset; | ||
| 1294 | |||
| 1295 | return (absolute_offset % SchedPeriod) == 0; | ||
| 1296 | } | 383 | } |
| 1297 | 384 | ||
| 1298 | void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, | 385 | void DeclareGlobalMemory() { |
| 1299 | const std::string& op_b, | 386 | for (const auto& entry : ir.GetGlobalMemoryBases()) { |
| 1300 | Tegra::Shader::PredicateResultMode predicate_mode, | 387 | const std::string binding = |
| 1301 | Tegra::Shader::Pred predicate, const bool set_cc) { | 388 | fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); |
| 1302 | std::string result{}; | 389 | code.AddLine("layout (std430, binding = " + binding + ") buffer " + |
| 1303 | switch (logic_op) { | 390 | GetGlobalMemoryBlock(entry) + " {"); |
| 1304 | case LogicOperation::And: { | 391 | code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); |
| 1305 | result = '(' + op_a + " & " + op_b + ')'; | 392 | code.AddLine("};"); |
| 1306 | break; | 393 | code.AddNewLine(); |
| 1307 | } | ||
| 1308 | case LogicOperation::Or: { | ||
| 1309 | result = '(' + op_a + " | " + op_b + ')'; | ||
| 1310 | break; | ||
| 1311 | } | ||
| 1312 | case LogicOperation::Xor: { | ||
| 1313 | result = '(' + op_a + " ^ " + op_b + ')'; | ||
| 1314 | break; | ||
| 1315 | } | ||
| 1316 | case LogicOperation::PassB: { | ||
| 1317 | result = op_b; | ||
| 1318 | break; | ||
| 1319 | } | ||
| 1320 | default: | ||
| 1321 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 1322 | } | 394 | } |
| 395 | } | ||
| 1323 | 396 | ||
| 1324 | if (dest != Tegra::Shader::Register::ZeroIndex) { | 397 | void DeclareSamplers() { |
| 1325 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | 398 | const auto& samplers = ir.GetSamplers(); |
| 1326 | } | 399 | for (const auto& sampler : samplers) { |
| 400 | std::string sampler_type = [&]() { | ||
| 401 | switch (sampler.GetType()) { | ||
| 402 | case Tegra::Shader::TextureType::Texture1D: | ||
| 403 | return "sampler1D"; | ||
| 404 | case Tegra::Shader::TextureType::Texture2D: | ||
| 405 | return "sampler2D"; | ||
| 406 | case Tegra::Shader::TextureType::Texture3D: | ||
| 407 | return "sampler3D"; | ||
| 408 | case Tegra::Shader::TextureType::TextureCube: | ||
| 409 | return "samplerCube"; | ||
| 410 | default: | ||
| 411 | UNREACHABLE(); | ||
| 412 | return "sampler2D"; | ||
| 413 | } | ||
| 414 | }(); | ||
| 415 | if (sampler.IsArray()) | ||
| 416 | sampler_type += "Array"; | ||
| 417 | if (sampler.IsShadow()) | ||
| 418 | sampler_type += "Shadow"; | ||
| 1327 | 419 | ||
| 1328 | using Tegra::Shader::PredicateResultMode; | 420 | code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + |
| 1329 | // Write the predicate value depending on the predicate mode. | 421 | ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); |
| 1330 | switch (predicate_mode) { | ||
| 1331 | case PredicateResultMode::None: | ||
| 1332 | // Do nothing. | ||
| 1333 | return; | ||
| 1334 | case PredicateResultMode::NotZero: | ||
| 1335 | // Set the predicate to true if the result is not zero. | ||
| 1336 | SetPredicate(static_cast<u64>(predicate), '(' + result + ") != 0"); | ||
| 1337 | break; | ||
| 1338 | default: | ||
| 1339 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 1340 | static_cast<u32>(predicate_mode)); | ||
| 1341 | } | 422 | } |
| 423 | if (!samplers.empty()) | ||
| 424 | code.AddNewLine(); | ||
| 1342 | } | 425 | } |
| 1343 | 426 | ||
| 1344 | void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b, | 427 | void VisitBasicBlock(const BasicBlock& bb) { |
| 1345 | const std::string& op_c, const std::string& imm_lut, | 428 | for (const Node node : bb) { |
| 1346 | const bool set_cc) { | 429 | if (const std::string expr = Visit(node); !expr.empty()) { |
| 1347 | if (dest == Tegra::Shader::Register::ZeroIndex) { | 430 | code.AddLine(expr); |
| 1348 | return; | 431 | } |
| 1349 | } | 432 | } |
| 433 | } | ||
| 1350 | 434 | ||
| 1351 | static constexpr std::array<const char*, 32> shift_amounts = { | 435 | std::string Visit(Node node) { |
| 1352 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", | 436 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 1353 | "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", | 437 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); |
| 1354 | "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"}; | 438 | const auto decompiler = operation_decompilers[operation_index]; |
| 1355 | 439 | if (decompiler == nullptr) { | |
| 1356 | std::string result; | 440 | UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); |
| 1357 | result += '('; | 441 | } |
| 1358 | 442 | return (this->*decompiler)(*operation); | |
| 1359 | for (std::size_t i = 0; i < shift_amounts.size(); ++i) { | ||
| 1360 | if (i) | ||
| 1361 | result += '|'; | ||
| 1362 | result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + | ||
| 1363 | ") & 1) | ((" + op_b + " >> " + shift_amounts[i] + ") & 1) << 1 | ((" + op_a + | ||
| 1364 | " >> " + shift_amounts[i] + ") & 1) << 2)) & 1) << " + shift_amounts[i] + ")"; | ||
| 1365 | } | ||
| 1366 | 443 | ||
| 1367 | result += ')'; | 444 | } else if (const auto gpr = std::get_if<GprNode>(node)) { |
| 445 | const u32 index = gpr->GetIndex(); | ||
| 446 | if (index == Register::ZeroIndex) { | ||
| 447 | return "0"; | ||
| 448 | } | ||
| 449 | return GetRegister(index); | ||
| 1368 | 450 | ||
| 1369 | regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc); | 451 | } else if (const auto immediate = std::get_if<ImmediateNode>(node)) { |
| 1370 | } | 452 | const u32 value = immediate->GetValue(); |
| 453 | if (value < 10) { | ||
| 454 | // For eyecandy avoid using hex numbers on single digits | ||
| 455 | return fmt::format("utof({}u)", immediate->GetValue()); | ||
| 456 | } | ||
| 457 | return fmt::format("utof(0x{:x}u)", immediate->GetValue()); | ||
| 1371 | 458 | ||
| 1372 | void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) { | 459 | } else if (const auto predicate = std::get_if<PredicateNode>(node)) { |
| 1373 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 460 | const auto value = [&]() -> std::string { |
| 1374 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 461 | switch (const auto index = predicate->GetIndex(); index) { |
| 462 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 463 | return "true"; | ||
| 464 | case Tegra::Shader::Pred::NeverExecute: | ||
| 465 | return "false"; | ||
| 466 | default: | ||
| 467 | return GetPredicate(index); | ||
| 468 | } | ||
| 469 | }(); | ||
| 470 | if (predicate->IsNegated()) { | ||
| 471 | return "!(" + value + ')'; | ||
| 472 | } | ||
| 473 | return value; | ||
| 1375 | 474 | ||
| 1376 | std::size_t written_components = 0; | 475 | } else if (const auto abuf = std::get_if<AbufNode>(node)) { |
| 1377 | for (u32 component = 0; component < 4; ++component) { | 476 | const auto attribute = abuf->GetIndex(); |
| 1378 | if (!instr.texs.IsComponentEnabled(component)) { | 477 | const auto element = abuf->GetElement(); |
| 1379 | continue; | 478 | |
| 479 | const auto GeometryPass = [&](const std::string& name) { | ||
| 480 | if (stage == ShaderStage::Geometry && abuf->GetBuffer()) { | ||
| 481 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | ||
| 482 | // set an 0x80000000 index for those and the shader fails to build. Find out why | ||
| 483 | // this happens and what's its intent. | ||
| 484 | return "gs_" + name + "[ftou(" + Visit(abuf->GetBuffer()) + | ||
| 485 | ") % MAX_VERTEX_INPUT]"; | ||
| 486 | } | ||
| 487 | return name; | ||
| 488 | }; | ||
| 489 | |||
| 490 | switch (attribute) { | ||
| 491 | case Attribute::Index::Position: | ||
| 492 | if (stage != ShaderStage::Fragment) { | ||
| 493 | return GeometryPass("position") + GetSwizzle(element); | ||
| 494 | } else { | ||
| 495 | return element == 3 ? "1.0f" : "gl_FragCoord" + GetSwizzle(element); | ||
| 496 | } | ||
| 497 | case Attribute::Index::PointCoord: | ||
| 498 | switch (element) { | ||
| 499 | case 0: | ||
| 500 | return "gl_PointCoord.x"; | ||
| 501 | case 1: | ||
| 502 | return "gl_PointCoord.y"; | ||
| 503 | case 2: | ||
| 504 | case 3: | ||
| 505 | return "0"; | ||
| 506 | } | ||
| 507 | UNREACHABLE(); | ||
| 508 | return "0"; | ||
| 509 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 510 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 511 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 512 | // shader. | ||
| 513 | ASSERT(stage == ShaderStage::Vertex); | ||
| 514 | switch (element) { | ||
| 515 | case 2: | ||
| 516 | // Config pack's first value is instance_id. | ||
| 517 | return "uintBitsToFloat(config_pack[0])"; | ||
| 518 | case 3: | ||
| 519 | return "uintBitsToFloat(gl_VertexID)"; | ||
| 520 | } | ||
| 521 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 522 | return "0"; | ||
| 523 | case Attribute::Index::FrontFacing: | ||
| 524 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 525 | ASSERT(stage == ShaderStage::Fragment); | ||
| 526 | switch (element) { | ||
| 527 | case 3: | ||
| 528 | return "itof(gl_FrontFacing ? -1 : 0)"; | ||
| 529 | } | ||
| 530 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 531 | return "0"; | ||
| 532 | default: | ||
| 533 | if (attribute >= Attribute::Index::Attribute_0 && | ||
| 534 | attribute <= Attribute::Index::Attribute_31) { | ||
| 535 | return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element); | ||
| 536 | } | ||
| 537 | break; | ||
| 1380 | } | 538 | } |
| 539 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | ||
| 540 | |||
| 541 | } else if (const auto cbuf = std::get_if<CbufNode>(node)) { | ||
| 542 | const Node offset = cbuf->GetOffset(); | ||
| 543 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | ||
| 544 | // Direct access | ||
| 545 | const u32 offset_imm = immediate->GetValue(); | ||
| 546 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | ||
| 547 | return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 548 | offset_imm / (4 * 4), (offset_imm / 4) % 4); | ||
| 549 | |||
| 550 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 551 | // Indirect access | ||
| 552 | const std::string final_offset = code.GenerateTemporal(); | ||
| 553 | code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + | ||
| 554 | std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); | ||
| 555 | return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), | ||
| 556 | final_offset, final_offset); | ||
| 1381 | 557 | ||
| 1382 | if (written_components < 2) { | ||
| 1383 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 1384 | regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false, | ||
| 1385 | written_components % 2); | ||
| 1386 | } else { | 558 | } else { |
| 1387 | ASSERT(instr.texs.HasTwoDestinations()); | 559 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 1388 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 1389 | regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false, | ||
| 1390 | written_components % 2); | ||
| 1391 | } | 560 | } |
| 1392 | 561 | ||
| 1393 | ++written_components; | 562 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { |
| 1394 | } | 563 | const std::string real = Visit(gmem->GetRealAddress()); |
| 1395 | } | 564 | const std::string base = Visit(gmem->GetBaseAddress()); |
| 565 | const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; | ||
| 566 | return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); | ||
| 1396 | 567 | ||
| 1397 | void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) { | 568 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { |
| 1398 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | 569 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1399 | // float instruction). | ||
| 1400 | 570 | ||
| 1401 | std::array<std::string, 4> components; | 571 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { |
| 1402 | u32 written_components = 0; | 572 | return GetInternalFlag(internal_flag->GetFlag()); |
| 1403 | 573 | ||
| 1404 | for (u32 component = 0; component < 4; ++component) { | 574 | } else if (const auto conditional = std::get_if<ConditionalNode>(node)) { |
| 1405 | if (!instr.texs.IsComponentEnabled(component)) | 575 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 1406 | continue; | 576 | code.AddLine("if (" + Visit(conditional->GetCondition()) + ") {"); |
| 1407 | components[written_components++] = texture + GetSwizzle(component); | 577 | ++code.scope; |
| 1408 | } | ||
| 1409 | if (written_components == 0) | ||
| 1410 | return; | ||
| 1411 | 578 | ||
| 1412 | const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) { | 579 | VisitBasicBlock(conditional->GetCode()); |
| 1413 | return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')'; | ||
| 1414 | }; | ||
| 1415 | 580 | ||
| 1416 | regs.SetRegisterToHalfFloat( | 581 | --code.scope; |
| 1417 | instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1), | 582 | code.AddLine('}'); |
| 1418 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | 583 | return {}; |
| 1419 | 584 | ||
| 1420 | if (written_components > 2) { | 585 | } else if (const auto comment = std::get_if<CommentNode>(node)) { |
| 1421 | ASSERT(instr.texs.HasTwoDestinations()); | 586 | return "// " + comment->GetText(); |
| 1422 | regs.SetRegisterToHalfFloat( | ||
| 1423 | instr.gpr28, 0, | ||
| 1424 | BuildComponent(components[2], components[3], written_components > 3), | ||
| 1425 | Tegra::Shader::HalfMerge::H0_H1, 1, 1); | ||
| 1426 | } | 587 | } |
| 588 | UNREACHABLE(); | ||
| 589 | return {}; | ||
| 1427 | } | 590 | } |
| 1428 | 591 | ||
| 1429 | static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) { | 592 | std::string ApplyPrecise(Operation operation, const std::string& value) { |
| 1430 | switch (texture_type) { | 593 | if (!IsPrecise(operation)) { |
| 1431 | case Tegra::Shader::TextureType::Texture1D: | 594 | return value; |
| 1432 | return 1; | ||
| 1433 | case Tegra::Shader::TextureType::Texture2D: | ||
| 1434 | return 2; | ||
| 1435 | case Tegra::Shader::TextureType::Texture3D: | ||
| 1436 | case Tegra::Shader::TextureType::TextureCube: | ||
| 1437 | return 3; | ||
| 1438 | default: | ||
| 1439 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 1440 | return 0; | ||
| 1441 | } | 595 | } |
| 1442 | } | 596 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 597 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | ||
| 1443 | 598 | ||
| 1444 | /* | 599 | const std::string temporal = code.GenerateTemporal(); |
| 1445 | * Emits code to push the input target address to the flow address stack, incrementing the stack | 600 | code.AddLine(precise + "float " + temporal + " = " + value + ';'); |
| 1446 | * top. | 601 | return temporal; |
| 1447 | */ | ||
| 1448 | void EmitPushToFlowStack(u32 target) { | ||
| 1449 | const auto scope = shader.Scope(); | ||
| 1450 | |||
| 1451 | shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;"); | ||
| 1452 | shader.AddLine("flow_stack_top++;"); | ||
| 1453 | } | 602 | } |
| 1454 | 603 | ||
| 1455 | /* | 604 | std::string VisitOperand(Operation operation, std::size_t operand_index) { |
| 1456 | * Emits code to pop an address from the flow address stack, setting the jump address to the | 605 | const auto& operand = operation[operand_index]; |
| 1457 | * popped address and decrementing the stack top. | 606 | const bool parent_precise = IsPrecise(operation); |
| 1458 | */ | 607 | const bool child_precise = IsPrecise(operand); |
| 1459 | void EmitPopFromFlowStack() { | 608 | const bool child_trivial = !std::holds_alternative<OperationNode>(*operand); |
| 1460 | const auto scope = shader.Scope(); | 609 | if (!parent_precise || child_precise || child_trivial) { |
| 610 | return Visit(operand); | ||
| 611 | } | ||
| 1461 | 612 | ||
| 1462 | shader.AddLine("flow_stack_top--;"); | 613 | const std::string temporal = code.GenerateTemporal(); |
| 1463 | shader.AddLine("jmp_to = flow_stack[flow_stack_top];"); | 614 | code.AddLine("float " + temporal + " = " + Visit(operand) + ';'); |
| 1464 | shader.AddLine("break;"); | 615 | return temporal; |
| 1465 | } | 616 | } |
| 1466 | 617 | ||
| 1467 | /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | 618 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { |
| 1468 | void EmitFragmentOutputsWrite() { | 619 | std::string value = VisitOperand(operation, operand_index); |
| 1469 | ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||
| 1470 | 620 | ||
| 1471 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Samplemask write is unimplemented"); | 621 | switch (type) { |
| 1472 | 622 | case Type::Bool: | |
| 1473 | shader.AddLine("if (alpha_test[0] != 0) {"); | 623 | case Type::Bool2: |
| 1474 | ++shader.scope; | 624 | case Type::Float: |
| 1475 | // We start on the register containing the alpha value in the first RT. | 625 | return value; |
| 1476 | u32 current_reg = 3; | 626 | case Type::Int: |
| 1477 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | 627 | return "ftoi(" + value + ')'; |
| 1478 | ++render_target) { | 628 | case Type::Uint: |
| 1479 | // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when | 629 | return "ftou(" + value + ')'; |
| 1480 | // multiple render targets are used. | 630 | case Type::HalfFloat: |
| 1481 | if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || | 631 | const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); |
| 1482 | header.ps.IsColorComponentOutputEnabled(render_target, 1) || | 632 | if (!half_meta) { |
| 1483 | header.ps.IsColorComponentOutputEnabled(render_target, 2) || | 633 | value = "toHalf2(" + value + ')'; |
| 1484 | header.ps.IsColorComponentOutputEnabled(render_target, 3)) { | ||
| 1485 | shader.AddLine(fmt::format("if (!AlphaFunc({})) discard;", | ||
| 1486 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1487 | current_reg += 4; | ||
| 1488 | } | 634 | } |
| 1489 | } | ||
| 1490 | --shader.scope; | ||
| 1491 | shader.AddLine('}'); | ||
| 1492 | 635 | ||
| 1493 | // Write the color outputs using the data in the shader registers, disabled | 636 | switch (half_meta->types.at(operand_index)) { |
| 1494 | // rendertargets/components are skipped in the register assignment. | 637 | case Tegra::Shader::HalfType::H0_H1: |
| 1495 | current_reg = 0; | 638 | return "toHalf2(" + value + ')'; |
| 1496 | for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; | 639 | case Tegra::Shader::HalfType::F32: |
| 1497 | ++render_target) { | 640 | return "vec2(" + value + ')'; |
| 1498 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 641 | case Tegra::Shader::HalfType::H0_H0: |
| 1499 | for (u32 component = 0; component < 4; ++component) { | 642 | return "vec2(toHalf2(" + value + ")[0])"; |
| 1500 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 643 | case Tegra::Shader::HalfType::H1_H1: |
| 1501 | shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | 644 | return "vec2(toHalf2(" + value + ")[1])"; |
| 1502 | regs.GetRegisterAsFloat(current_reg))); | ||
| 1503 | ++current_reg; | ||
| 1504 | } | ||
| 1505 | } | 645 | } |
| 1506 | } | 646 | } |
| 1507 | 647 | UNREACHABLE(); | |
| 1508 | if (header.ps.omap.depth) { | 648 | return value; |
| 1509 | // The depth output is always 2 registers after the last color output, and current_reg | ||
| 1510 | // already contains one past the last color register. | ||
| 1511 | |||
| 1512 | shader.AddLine( | ||
| 1513 | "gl_FragDepth = " + | ||
| 1514 | regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) + | ||
| 1515 | ';'); | ||
| 1516 | } | ||
| 1517 | } | 649 | } |
| 1518 | 650 | ||
| 1519 | /// Unpacks a video instruction operand (e.g. VMAD). | 651 | std::string BitwiseCastResult(std::string value, Type type, bool needs_parenthesis = false) { |
| 1520 | std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, | 652 | switch (type) { |
| 1521 | Tegra::Shader::VideoType type, u64 byte_height) { | 653 | case Type::Bool: |
| 1522 | const std::string value = [&]() { | 654 | case Type::Float: |
| 1523 | if (!is_chunk) { | 655 | if (needs_parenthesis) { |
| 1524 | const auto offset = static_cast<u32>(byte_height * 8); | 656 | return '(' + value + ')'; |
| 1525 | return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||
| 1526 | } | ||
| 1527 | const std::string zero = "0"; | ||
| 1528 | |||
| 1529 | switch (type) { | ||
| 1530 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 1531 | return '(' + op + " & 0xffff)"; | ||
| 1532 | case Tegra::Shader::VideoType::Size16_High: | ||
| 1533 | return '(' + op + " >> 16)"; | ||
| 1534 | case Tegra::Shader::VideoType::Size32: | ||
| 1535 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||
| 1536 | // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||
| 1537 | // explanation is found: abort. | ||
| 1538 | UNIMPLEMENTED(); | ||
| 1539 | return zero; | ||
| 1540 | case Tegra::Shader::VideoType::Invalid: | ||
| 1541 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 1542 | return zero; | ||
| 1543 | default: | ||
| 1544 | UNREACHABLE(); | ||
| 1545 | return zero; | ||
| 1546 | } | 657 | } |
| 1547 | }(); | 658 | return value; |
| 1548 | 659 | case Type::Int: | |
| 1549 | if (is_signed) { | 660 | return "itof(" + value + ')'; |
| 1550 | return "int(" + value + ')'; | 661 | case Type::Uint: |
| 1551 | } | 662 | return "utof(" + value + ')'; |
| 663 | case Type::HalfFloat: | ||
| 664 | return "fromHalf2(" + value + ')'; | ||
| 665 | } | ||
| 666 | UNREACHABLE(); | ||
| 1552 | return value; | 667 | return value; |
| 1553 | }; | ||
| 1554 | |||
| 1555 | /// Gets the A operand for a video instruction. | ||
| 1556 | std::string GetVideoOperandA(Instruction instr) { | ||
| 1557 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||
| 1558 | instr.video.is_byte_chunk_a != 0, instr.video.signed_a, | ||
| 1559 | instr.video.type_a, instr.video.byte_height_a); | ||
| 1560 | } | 668 | } |
| 1561 | 669 | ||
| 1562 | /// Gets the B operand for a video instruction. | 670 | std::string GenerateUnary(Operation operation, const std::string& func, Type result_type, |
| 1563 | std::string GetVideoOperandB(Instruction instr) { | 671 | Type type_a, bool needs_parenthesis = true) { |
| 1564 | if (instr.video.use_register_b) { | 672 | return ApplyPrecise(operation, |
| 1565 | return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | 673 | BitwiseCastResult(func + '(' + VisitOperand(operation, 0, type_a) + ')', |
| 1566 | instr.video.is_byte_chunk_b != 0, instr.video.signed_b, | 674 | result_type, needs_parenthesis)); |
| 1567 | instr.video.type_b, instr.video.byte_height_b); | ||
| 1568 | } else { | ||
| 1569 | return '(' + | ||
| 1570 | std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||
| 1571 | : instr.alu.GetImm20_16()) + | ||
| 1572 | ')'; | ||
| 1573 | } | ||
| 1574 | } | 675 | } |
| 1575 | 676 | ||
| 1576 | std::pair<size_t, std::string> ValidateAndGetCoordinateElement( | 677 | std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type, |
| 1577 | const Tegra::Shader::TextureType texture_type, const bool depth_compare, | 678 | Type type_a, Type type_b) { |
| 1578 | const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) { | 679 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1579 | const size_t coord_count = TextureCoordinates(texture_type); | 680 | const std::string op_b = VisitOperand(operation, 1, type_b); |
| 1580 | |||
| 1581 | size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 1582 | const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 1583 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 1584 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 1585 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 1586 | } | ||
| 1587 | // 1D.DC opengl is using a vec3 but 2nd component is ignored later. | ||
| 1588 | total_coord_count += | ||
| 1589 | (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) | ||
| 1590 | ? 1 | ||
| 1591 | : 0; | ||
| 1592 | |||
| 1593 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1594 | {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(", | ||
| 1595 | "vec4 coord = vec4("}}; | ||
| 1596 | |||
| 1597 | return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]); | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | std::string GetTextureCode(const Tegra::Shader::Instruction& instr, | ||
| 1601 | const Tegra::Shader::TextureType texture_type, | ||
| 1602 | const Tegra::Shader::TextureProcessMode process_mode, | ||
| 1603 | const bool depth_compare, const bool is_array, | ||
| 1604 | const size_t bias_offset) { | ||
| 1605 | |||
| 1606 | if ((texture_type == Tegra::Shader::TextureType::Texture3D && | ||
| 1607 | (is_array || depth_compare)) || | ||
| 1608 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && | ||
| 1609 | depth_compare)) { | ||
| 1610 | UNIMPLEMENTED_MSG("This method is not supported."); | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | const std::string sampler = | ||
| 1614 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1615 | |||
| 1616 | const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ || | ||
| 1617 | process_mode == Tegra::Shader::TextureProcessMode::LL || | ||
| 1618 | process_mode == Tegra::Shader::TextureProcessMode::LLA; | ||
| 1619 | 681 | ||
| 1620 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | 682 | return ApplyPrecise( |
| 1621 | // sampler2DArrayShadow and samplerCubeArrayShadow. | 683 | operation, BitwiseCastResult('(' + op_a + ' ' + func + ' ' + op_b + ')', result_type)); |
| 1622 | const bool gl_lod_supported = !( | ||
| 1623 | (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 1624 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 1625 | |||
| 1626 | const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture("; | ||
| 1627 | std::string texture = read_method + sampler + ", coord"; | ||
| 1628 | |||
| 1629 | UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1630 | !gl_lod_supported); | ||
| 1631 | |||
| 1632 | if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) { | ||
| 1633 | if (process_mode == Tegra::Shader::TextureProcessMode::LZ) { | ||
| 1634 | texture += ", 0.0"; | ||
| 1635 | } else { | ||
| 1636 | // If present, lod or bias are always stored in the register indexed by the | ||
| 1637 | // gpr20 | ||
| 1638 | // field with an offset depending on the usage of the other registers | ||
| 1639 | texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset); | ||
| 1640 | } | ||
| 1641 | } | ||
| 1642 | texture += ")"; | ||
| 1643 | return texture; | ||
| 1644 | } | ||
| 1645 | |||
| 1646 | std::pair<std::string, std::string> GetTEXCode( | ||
| 1647 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1648 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1649 | const bool is_array) { | ||
| 1650 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1651 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1652 | |||
| 1653 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1654 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 1655 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1656 | const u64 array_register = instr.gpr8.Value(); | ||
| 1657 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1658 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1659 | |||
| 1660 | std::string coord = coord_dcl; | ||
| 1661 | for (size_t i = 0; i < coord_count;) { | ||
| 1662 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1663 | ++i; | ||
| 1664 | if (i != coord_count) { | ||
| 1665 | coord += ','; | ||
| 1666 | } | ||
| 1667 | } | ||
| 1668 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 1669 | if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) { | ||
| 1670 | coord += ",0.0"; | ||
| 1671 | } | ||
| 1672 | if (is_array) { | ||
| 1673 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1674 | } | ||
| 1675 | if (depth_compare) { | ||
| 1676 | // Depth is always stored in the register signaled by gpr20 | ||
| 1677 | // or in the next register if lod or bias are used | ||
| 1678 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1679 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1680 | } | ||
| 1681 | coord += ");"; | ||
| 1682 | return std::make_pair( | ||
| 1683 | coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0)); | ||
| 1684 | } | ||
| 1685 | |||
| 1686 | std::pair<std::string, std::string> GetTEXSCode( | ||
| 1687 | const Instruction& instr, const Tegra::Shader::TextureType texture_type, | ||
| 1688 | const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare, | ||
| 1689 | const bool is_array) { | ||
| 1690 | const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None && | ||
| 1691 | process_mode != Tegra::Shader::TextureProcessMode::LZ); | ||
| 1692 | |||
| 1693 | const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement( | ||
| 1694 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 1695 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1696 | const u64 array_register = instr.gpr8.Value(); | ||
| 1697 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 1698 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1699 | const u64 last_coord_register = | ||
| 1700 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 1701 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1702 | : coord_register + 1; | ||
| 1703 | |||
| 1704 | std::string coord = coord_dcl; | ||
| 1705 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 1706 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 1707 | coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i); | ||
| 1708 | if (i < coord_count - 1) { | ||
| 1709 | coord += ','; | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | if (is_array) { | ||
| 1714 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1715 | } | ||
| 1716 | if (depth_compare) { | ||
| 1717 | // Depth is always stored in the register signaled by gpr20 | ||
| 1718 | // or in the next register if lod or bias are used | ||
| 1719 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 1720 | coord += ',' + regs.GetRegisterAsFloat(depth_register); | ||
| 1721 | } | ||
| 1722 | coord += ");"; | ||
| 1723 | |||
| 1724 | return std::make_pair(coord, | ||
| 1725 | GetTextureCode(instr, texture_type, process_mode, depth_compare, | ||
| 1726 | is_array, (coord_count > 2 ? 1 : 0))); | ||
| 1727 | } | 684 | } |
| 1728 | 685 | ||
| 1729 | std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr, | 686 | std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type, |
| 1730 | const Tegra::Shader::TextureType texture_type, | 687 | Type type_a, Type type_b) { |
| 1731 | const bool depth_compare, const bool is_array) { | 688 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1732 | 689 | const std::string op_b = VisitOperand(operation, 1, type_b); | |
| 1733 | const size_t coord_count = TextureCoordinates(texture_type); | ||
| 1734 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 1735 | const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 1736 | |||
| 1737 | constexpr std::array<const char*, 5> coord_container{ | ||
| 1738 | {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}}; | ||
| 1739 | |||
| 1740 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1741 | const u64 array_register = instr.gpr8.Value(); | ||
| 1742 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 1743 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 1744 | 690 | ||
| 1745 | std::string coord = coord_container[total_coord_count]; | 691 | return ApplyPrecise(operation, |
| 1746 | for (size_t i = 0; i < coord_count;) { | 692 | BitwiseCastResult(func + '(' + op_a + ", " + op_b + ')', result_type)); |
| 1747 | coord += regs.GetRegisterAsFloat(coord_register + i); | ||
| 1748 | ++i; | ||
| 1749 | if (i != coord_count) { | ||
| 1750 | coord += ','; | ||
| 1751 | } | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | if (is_array) { | ||
| 1755 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1756 | } | ||
| 1757 | coord += ");"; | ||
| 1758 | |||
| 1759 | const std::string sampler = | ||
| 1760 | GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 1761 | |||
| 1762 | std::string texture = "textureGather(" + sampler + ", coord, "; | ||
| 1763 | if (depth_compare) { | ||
| 1764 | // Depth is always stored in the register signaled by gpr20 | ||
| 1765 | texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')'; | ||
| 1766 | } else { | ||
| 1767 | texture += std::to_string(instr.tld4.component) + ')'; | ||
| 1768 | } | ||
| 1769 | return std::make_pair(coord, texture); | ||
| 1770 | } | 693 | } |
| 1771 | 694 | ||
| 1772 | std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr, | 695 | std::string GenerateTernary(Operation operation, const std::string& func, Type result_type, |
| 1773 | const Tegra::Shader::TextureType texture_type, | 696 | Type type_a, Type type_b, Type type_c) { |
| 1774 | const bool is_array) { | 697 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 698 | const std::string op_b = VisitOperand(operation, 1, type_b); | ||
| 699 | const std::string op_c = VisitOperand(operation, 2, type_c); | ||
| 1775 | 700 | ||
| 1776 | const size_t coord_count = TextureCoordinates(texture_type); | 701 | return ApplyPrecise( |
| 1777 | const size_t total_coord_count = coord_count + (is_array ? 1 : 0); | 702 | operation, |
| 1778 | const bool lod_enabled = | 703 | BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + op_c + ')', result_type)); |
| 1779 | instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL; | 704 | } |
| 1780 | |||
| 1781 | constexpr std::array<const char*, 4> coord_container{ | ||
| 1782 | {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}}; | ||
| 1783 | |||
| 1784 | std::string coord = coord_container[total_coord_count]; | ||
| 1785 | |||
| 1786 | // If enabled arrays index is always stored in the gpr8 field | ||
| 1787 | const u64 array_register = instr.gpr8.Value(); | ||
| 1788 | |||
| 1789 | // if is array gpr20 is used | ||
| 1790 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 1791 | |||
| 1792 | const u64 last_coord_register = | ||
| 1793 | ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array | ||
| 1794 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 1795 | : coord_register + 1; | ||
| 1796 | |||
| 1797 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 1798 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 1799 | coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i); | ||
| 1800 | if (i < coord_count - 1) { | ||
| 1801 | coord += ','; | ||
| 1802 | } | ||
| 1803 | } | ||
| 1804 | if (is_array) { | ||
| 1805 | coord += ',' + regs.GetRegisterAsInteger(array_register); | ||
| 1806 | } | ||
| 1807 | coord += ");"; | ||
| 1808 | |||
| 1809 | const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 1810 | |||
| 1811 | std::string texture = "texelFetch(" + sampler + ", coords"; | ||
| 1812 | |||
| 1813 | if (lod_enabled) { | ||
| 1814 | // When lod is used always is in grp20 | ||
| 1815 | texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')'; | ||
| 1816 | } else { | ||
| 1817 | texture += ", 0)"; | ||
| 1818 | } | ||
| 1819 | return std::make_pair(coord, texture); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | /** | ||
| 1823 | * Compiles a single instruction from Tegra to GLSL. | ||
| 1824 | * @param offset the offset of the Tegra shader instruction. | ||
| 1825 | * @return the offset of the next instruction to execute. Usually it is the current offset | ||
| 1826 | * + 1. If the current instruction always terminates the program, returns PROGRAM_END. | ||
| 1827 | */ | ||
| 1828 | u32 CompileInstr(u32 offset) { | ||
| 1829 | // Ignore sched instructions when generating code. | ||
| 1830 | if (IsSchedInstruction(offset)) { | ||
| 1831 | return offset + 1; | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | const Instruction instr = {program_code[offset]}; | ||
| 1835 | const auto opcode = OpCode::Decode(instr); | ||
| 1836 | |||
| 1837 | // Decoding failure | ||
| 1838 | if (!opcode) { | ||
| 1839 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 1840 | return offset + 1; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | shader.AddLine( | ||
| 1844 | fmt::format("// {}: {} (0x{:016x})", offset, opcode->get().GetName(), instr.value)); | ||
| 1845 | |||
| 1846 | using Tegra::Shader::Pred; | ||
| 1847 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 1848 | "NeverExecute predicate not implemented"); | ||
| 1849 | |||
| 1850 | // Some instructions (like SSY) don't have a predicate field, they are always | ||
| 1851 | // unconditionally executed. | ||
| 1852 | bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 1853 | 705 | ||
| 1854 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | 706 | std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type, |
| 1855 | shader.AddLine("if (" + | 707 | Type type_a, Type type_b, Type type_c, Type type_d) { |
| 1856 | GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + | 708 | const std::string op_a = VisitOperand(operation, 0, type_a); |
| 1857 | ')'); | 709 | const std::string op_b = VisitOperand(operation, 1, type_b); |
| 1858 | shader.AddLine('{'); | 710 | const std::string op_c = VisitOperand(operation, 2, type_c); |
| 1859 | ++shader.scope; | 711 | const std::string op_d = VisitOperand(operation, 3, type_d); |
| 1860 | } | ||
| 1861 | 712 | ||
| 1862 | switch (opcode->get().GetType()) { | 713 | return ApplyPrecise(operation, BitwiseCastResult(func + '(' + op_a + ", " + op_b + ", " + |
| 1863 | case OpCode::Type::Arithmetic: { | 714 | op_c + ", " + op_d + ')', |
| 1864 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 715 | result_type)); |
| 716 | } | ||
| 1865 | 717 | ||
| 1866 | std::string op_b; | 718 | std::string GenerateTexture(Operation operation, const std::string& func, |
| 719 | bool is_extra_int = false) { | ||
| 720 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||
| 1867 | 721 | ||
| 1868 | if (instr.is_b_imm) { | 722 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1869 | op_b = GetImmediate19(instr); | 723 | const auto count = static_cast<u32>(operation.GetOperandsCount()); |
| 1870 | } else { | 724 | ASSERT(meta); |
| 1871 | if (instr.is_b_gpr) { | ||
| 1872 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 1873 | } else { | ||
| 1874 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 1875 | GLSLRegister::Type::Float); | ||
| 1876 | } | ||
| 1877 | } | ||
| 1878 | 725 | ||
| 1879 | switch (opcode->get().GetId()) { | 726 | std::string expr = func; |
| 1880 | case OpCode::Id::MOV_C: | 727 | expr += '('; |
| 1881 | case OpCode::Id::MOV_R: { | 728 | expr += GetSampler(meta->sampler); |
| 1882 | // MOV does not have neither 'abs' nor 'neg' bits. | 729 | expr += ", "; |
| 1883 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | ||
| 1884 | break; | ||
| 1885 | } | ||
| 1886 | 730 | ||
| 1887 | case OpCode::Id::FMUL_C: | 731 | expr += coord_constructors[meta->coords_count - 1]; |
| 1888 | case OpCode::Id::FMUL_R: | 732 | expr += '('; |
| 1889 | case OpCode::Id::FMUL_IMM: { | 733 | for (u32 i = 0; i < count; ++i) { |
| 1890 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | 734 | const bool is_extra = i >= meta->coords_count; |
| 1891 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, | 735 | const bool is_array = i == meta->array_index; |
| 1892 | "FMUL tab5cb8_2({}) is not implemented", | ||
| 1893 | instr.fmul.tab5cb8_2.Value()); | ||
| 1894 | UNIMPLEMENTED_IF_MSG( | ||
| 1895 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 1896 | instr.fmul.tab5c68_0 | ||
| 1897 | .Value()); // SMO typical sends 1 here which seems to be the default | ||
| 1898 | |||
| 1899 | op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b); | ||
| 1900 | |||
| 1901 | std::string postfactor_op; | ||
| 1902 | if (instr.fmul.postfactor != 0) { | ||
| 1903 | s8 postfactor = static_cast<s8>(instr.fmul.postfactor); | ||
| 1904 | |||
| 1905 | // postfactor encoded as 3-bit 1's complement in instruction, | ||
| 1906 | // interpreted with below logic. | ||
| 1907 | if (postfactor >= 4) { | ||
| 1908 | postfactor = 7 - postfactor; | ||
| 1909 | } else { | ||
| 1910 | postfactor = 0 - postfactor; | ||
| 1911 | } | ||
| 1912 | 736 | ||
| 1913 | if (postfactor > 0) { | 737 | std::string operand = [&]() { |
| 1914 | postfactor_op = " * " + std::to_string(1 << postfactor); | 738 | if (is_extra && is_extra_int) { |
| 739 | if (const auto immediate = std::get_if<ImmediateNode>(operation[i])) { | ||
| 740 | return std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 1915 | } else { | 741 | } else { |
| 1916 | postfactor_op = " / " + std::to_string(1 << -postfactor); | 742 | return "ftoi(" + Visit(operation[i]) + ')'; |
| 1917 | } | 743 | } |
| 1918 | } | ||
| 1919 | |||
| 1920 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1, | ||
| 1921 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1922 | break; | ||
| 1923 | } | ||
| 1924 | case OpCode::Id::FADD_C: | ||
| 1925 | case OpCode::Id::FADD_R: | ||
| 1926 | case OpCode::Id::FADD_IMM: { | ||
| 1927 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1928 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1929 | |||
| 1930 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, | ||
| 1931 | instr.alu.saturate_d, instr.generates_cc, 0, true); | ||
| 1932 | break; | ||
| 1933 | } | ||
| 1934 | case OpCode::Id::MUFU: { | ||
| 1935 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1936 | switch (instr.sub_op) { | ||
| 1937 | case SubOp::Cos: | ||
| 1938 | regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, | ||
| 1939 | instr.alu.saturate_d, false, 0, true); | ||
| 1940 | break; | ||
| 1941 | case SubOp::Sin: | ||
| 1942 | regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, | ||
| 1943 | instr.alu.saturate_d, false, 0, true); | ||
| 1944 | break; | ||
| 1945 | case SubOp::Ex2: | ||
| 1946 | regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, | ||
| 1947 | instr.alu.saturate_d, false, 0, true); | ||
| 1948 | break; | ||
| 1949 | case SubOp::Lg2: | ||
| 1950 | regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, | ||
| 1951 | instr.alu.saturate_d, false, 0, true); | ||
| 1952 | break; | ||
| 1953 | case SubOp::Rcp: | ||
| 1954 | regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, | ||
| 1955 | instr.alu.saturate_d, false, 0, true); | ||
| 1956 | break; | ||
| 1957 | case SubOp::Rsq: | ||
| 1958 | regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, | ||
| 1959 | instr.alu.saturate_d, false, 0, true); | ||
| 1960 | break; | ||
| 1961 | case SubOp::Sqrt: | ||
| 1962 | regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, | ||
| 1963 | instr.alu.saturate_d, false, 0, true); | ||
| 1964 | break; | ||
| 1965 | default: | ||
| 1966 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 1967 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 1968 | } | ||
| 1969 | break; | ||
| 1970 | } | ||
| 1971 | case OpCode::Id::FMNMX_C: | ||
| 1972 | case OpCode::Id::FMNMX_R: | ||
| 1973 | case OpCode::Id::FMNMX_IMM: { | ||
| 1974 | UNIMPLEMENTED_IF_MSG( | ||
| 1975 | instr.generates_cc, | ||
| 1976 | "Condition codes generation in FMNMX is partially implemented"); | ||
| 1977 | |||
| 1978 | op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 1979 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1980 | |||
| 1981 | std::string condition = | ||
| 1982 | GetPredicateCondition(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 1983 | std::string parameters = op_a + ',' + op_b; | ||
| 1984 | regs.SetRegisterToFloat(instr.gpr0, 0, | ||
| 1985 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 1986 | parameters + ')', | ||
| 1987 | 1, 1, false, instr.generates_cc, 0, true); | ||
| 1988 | break; | ||
| 1989 | } | ||
| 1990 | case OpCode::Id::RRO_C: | ||
| 1991 | case OpCode::Id::RRO_R: | ||
| 1992 | case OpCode::Id::RRO_IMM: { | ||
| 1993 | // Currently RRO is only implemented as a register move. | ||
| 1994 | op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 1995 | regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); | ||
| 1996 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 1997 | break; | ||
| 1998 | } | ||
| 1999 | default: { | ||
| 2000 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 2001 | } | ||
| 2002 | } | ||
| 2003 | break; | ||
| 2004 | } | ||
| 2005 | case OpCode::Type::ArithmeticImmediate: { | ||
| 2006 | switch (opcode->get().GetId()) { | ||
| 2007 | case OpCode::Id::MOV32_IMM: { | ||
| 2008 | regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); | ||
| 2009 | break; | ||
| 2010 | } | ||
| 2011 | case OpCode::Id::FMUL32_IMM: { | ||
| 2012 | regs.SetRegisterToFloat( | ||
| 2013 | instr.gpr0, 0, | ||
| 2014 | regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1, | ||
| 2015 | instr.fmul32.saturate, instr.op_32.generates_cc, 0, true); | ||
| 2016 | break; | ||
| 2017 | } | ||
| 2018 | case OpCode::Id::FADD32I: { | ||
| 2019 | UNIMPLEMENTED_IF_MSG( | ||
| 2020 | instr.op_32.generates_cc, | ||
| 2021 | "Condition codes generation in FADD32I is partially implemented"); | ||
| 2022 | |||
| 2023 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2024 | std::string op_b = GetImmediate32(instr); | ||
| 2025 | |||
| 2026 | if (instr.fadd32i.abs_a) { | ||
| 2027 | op_a = "abs(" + op_a + ')'; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | if (instr.fadd32i.negate_a) { | ||
| 2031 | op_a = "-(" + op_a + ')'; | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | if (instr.fadd32i.abs_b) { | ||
| 2035 | op_b = "abs(" + op_b + ')'; | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | if (instr.fadd32i.negate_b) { | ||
| 2039 | op_b = "-(" + op_b + ')'; | ||
| 2040 | } | ||
| 2041 | |||
| 2042 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, | ||
| 2043 | instr.op_32.generates_cc, 0, true); | ||
| 2044 | break; | ||
| 2045 | } | ||
| 2046 | } | ||
| 2047 | break; | ||
| 2048 | } | ||
| 2049 | case OpCode::Type::Bfe: { | ||
| 2050 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 2051 | |||
| 2052 | std::string op_a = instr.bfe.negate_a ? "-" : ""; | ||
| 2053 | op_a += regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2054 | |||
| 2055 | switch (opcode->get().GetId()) { | ||
| 2056 | case OpCode::Id::BFE_IMM: { | ||
| 2057 | std::string inner_shift = | ||
| 2058 | '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')'; | ||
| 2059 | std::string outer_shift = | ||
| 2060 | '(' + inner_shift + " >> " + | ||
| 2061 | std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')'; | ||
| 2062 | |||
| 2063 | regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false, | ||
| 2064 | instr.generates_cc); | ||
| 2065 | break; | ||
| 2066 | } | ||
| 2067 | default: { | ||
| 2068 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 2069 | } | ||
| 2070 | } | ||
| 2071 | |||
| 2072 | break; | ||
| 2073 | } | ||
| 2074 | case OpCode::Type::Bfi: { | ||
| 2075 | const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> { | ||
| 2076 | switch (opcode->get().GetId()) { | ||
| 2077 | case OpCode::Id::BFI_IMM_R: | ||
| 2078 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2079 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2080 | default: | ||
| 2081 | UNREACHABLE(); | ||
| 2082 | return {regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2083 | std::to_string(instr.alu.GetSignedImm20_20())}; | ||
| 2084 | } | ||
| 2085 | }(); | ||
| 2086 | const std::string offset = '(' + packed_shift + " & 0xff)"; | ||
| 2087 | const std::string bits = "((" + packed_shift + " >> 8) & 0xff)"; | ||
| 2088 | const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false); | ||
| 2089 | regs.SetRegisterToInteger(instr.gpr0, false, 0, | ||
| 2090 | "bitfieldInsert(" + base + ", " + insert + ", " + offset + | ||
| 2091 | ", " + bits + ')', | ||
| 2092 | 1, 1, false, instr.generates_cc); | ||
| 2093 | break; | ||
| 2094 | } | ||
| 2095 | case OpCode::Type::Shift: { | ||
| 2096 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); | ||
| 2097 | std::string op_b; | ||
| 2098 | |||
| 2099 | if (instr.is_b_imm) { | ||
| 2100 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2101 | } else { | ||
| 2102 | if (instr.is_b_gpr) { | ||
| 2103 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2104 | } else { | 744 | } else { |
| 2105 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | 745 | return Visit(operation[i]); |
| 2106 | GLSLRegister::Type::Integer); | ||
| 2107 | } | 746 | } |
| 747 | }(); | ||
| 748 | if (is_array) { | ||
| 749 | ASSERT(!is_extra); | ||
| 750 | operand = "float(ftoi(" + operand + "))"; | ||
| 2108 | } | 751 | } |
| 2109 | 752 | ||
| 2110 | switch (opcode->get().GetId()) { | 753 | expr += operand; |
| 2111 | case OpCode::Id::SHR_C: | ||
| 2112 | case OpCode::Id::SHR_R: | ||
| 2113 | case OpCode::Id::SHR_IMM: { | ||
| 2114 | if (!instr.shift.is_signed) { | ||
| 2115 | // Logical shift right | ||
| 2116 | op_a = "uint(" + op_a + ')'; | ||
| 2117 | } | ||
| 2118 | 754 | ||
| 2119 | // Cast to int is superfluous for arithmetic shift, it's only for a logical shift | 755 | if (i + 1 == meta->coords_count) { |
| 2120 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')', | 756 | expr += ')'; |
| 2121 | 1, 1, false, instr.generates_cc); | ||
| 2122 | break; | ||
| 2123 | } | ||
| 2124 | case OpCode::Id::SHL_C: | ||
| 2125 | case OpCode::Id::SHL_R: | ||
| 2126 | case OpCode::Id::SHL_IMM: | ||
| 2127 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2128 | "Condition codes generation in SHL is not implemented"); | ||
| 2129 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false, | ||
| 2130 | instr.generates_cc); | ||
| 2131 | break; | ||
| 2132 | default: { | ||
| 2133 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 2134 | } | 757 | } |
| 758 | if (i + 1 < count) { | ||
| 759 | expr += ", "; | ||
| 2135 | } | 760 | } |
| 2136 | break; | ||
| 2137 | } | 761 | } |
| 2138 | case OpCode::Type::ArithmeticIntegerImmediate: { | 762 | expr += ')'; |
| 2139 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | 763 | return expr; |
| 2140 | std::string op_b = std::to_string(instr.alu.imm20_32.Value()); | 764 | } |
| 2141 | |||
| 2142 | switch (opcode->get().GetId()) { | ||
| 2143 | case OpCode::Id::IADD32I: | ||
| 2144 | UNIMPLEMENTED_IF_MSG( | ||
| 2145 | instr.op_32.generates_cc, | ||
| 2146 | "Condition codes generation in IADD32I is partially implemented"); | ||
| 2147 | |||
| 2148 | if (instr.iadd32i.negate_a) | ||
| 2149 | op_a = "-(" + op_a + ')'; | ||
| 2150 | |||
| 2151 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2152 | instr.iadd32i.saturate, instr.op_32.generates_cc); | ||
| 2153 | break; | ||
| 2154 | case OpCode::Id::LOP32I: { | ||
| 2155 | |||
| 2156 | if (instr.alu.lop32i.invert_a) | ||
| 2157 | op_a = "~(" + op_a + ')'; | ||
| 2158 | |||
| 2159 | if (instr.alu.lop32i.invert_b) | ||
| 2160 | op_b = "~(" + op_b + ')'; | ||
| 2161 | |||
| 2162 | WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 2163 | Tegra::Shader::PredicateResultMode::None, | ||
| 2164 | Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 2165 | break; | ||
| 2166 | } | ||
| 2167 | default: { | ||
| 2168 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 2169 | opcode->get().GetName()); | ||
| 2170 | } | ||
| 2171 | } | ||
| 2172 | break; | ||
| 2173 | } | ||
| 2174 | case OpCode::Type::ArithmeticInteger: { | ||
| 2175 | std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2176 | std::string op_b; | ||
| 2177 | if (instr.is_b_imm) { | ||
| 2178 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | ||
| 2179 | } else { | ||
| 2180 | if (instr.is_b_gpr) { | ||
| 2181 | op_b += regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2182 | } else { | ||
| 2183 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2184 | GLSLRegister::Type::Integer); | ||
| 2185 | } | ||
| 2186 | } | ||
| 2187 | |||
| 2188 | switch (opcode->get().GetId()) { | ||
| 2189 | case OpCode::Id::IADD_C: | ||
| 2190 | case OpCode::Id::IADD_R: | ||
| 2191 | case OpCode::Id::IADD_IMM: { | ||
| 2192 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2193 | "Condition codes generation in IADD is partially implemented"); | ||
| 2194 | |||
| 2195 | if (instr.alu_integer.negate_a) | ||
| 2196 | op_a = "-(" + op_a + ')'; | ||
| 2197 | |||
| 2198 | if (instr.alu_integer.negate_b) | ||
| 2199 | op_b = "-(" + op_b + ')'; | ||
| 2200 | |||
| 2201 | regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, | ||
| 2202 | instr.alu.saturate_d, instr.generates_cc); | ||
| 2203 | break; | ||
| 2204 | } | ||
| 2205 | case OpCode::Id::IADD3_C: | ||
| 2206 | case OpCode::Id::IADD3_R: | ||
| 2207 | case OpCode::Id::IADD3_IMM: { | ||
| 2208 | UNIMPLEMENTED_IF_MSG( | ||
| 2209 | instr.generates_cc, | ||
| 2210 | "Condition codes generation in IADD3 is partially implemented"); | ||
| 2211 | |||
| 2212 | std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2213 | |||
| 2214 | auto apply_height = [](auto height, auto& oprand) { | ||
| 2215 | switch (height) { | ||
| 2216 | case Tegra::Shader::IAdd3Height::None: | ||
| 2217 | break; | ||
| 2218 | case Tegra::Shader::IAdd3Height::LowerHalfWord: | ||
| 2219 | oprand = "((" + oprand + ") & 0xFFFF)"; | ||
| 2220 | break; | ||
| 2221 | case Tegra::Shader::IAdd3Height::UpperHalfWord: | ||
| 2222 | oprand = "((" + oprand + ") >> 16)"; | ||
| 2223 | break; | ||
| 2224 | default: | ||
| 2225 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", | ||
| 2226 | static_cast<u32>(height.Value())); | ||
| 2227 | } | ||
| 2228 | }; | ||
| 2229 | |||
| 2230 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2231 | apply_height(instr.iadd3.height_a, op_a); | ||
| 2232 | apply_height(instr.iadd3.height_b, op_b); | ||
| 2233 | apply_height(instr.iadd3.height_c, op_c); | ||
| 2234 | } | ||
| 2235 | |||
| 2236 | if (instr.iadd3.neg_a) | ||
| 2237 | op_a = "-(" + op_a + ')'; | ||
| 2238 | |||
| 2239 | if (instr.iadd3.neg_b) | ||
| 2240 | op_b = "-(" + op_b + ')'; | ||
| 2241 | |||
| 2242 | if (instr.iadd3.neg_c) | ||
| 2243 | op_c = "-(" + op_c + ')'; | ||
| 2244 | |||
| 2245 | std::string result; | ||
| 2246 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 2247 | switch (instr.iadd3.mode) { | ||
| 2248 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 2249 | // TODO(tech4me): According to | ||
| 2250 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 2251 | // The addition between op_a and op_b should be done in uint33, more | ||
| 2252 | // investigation required | ||
| 2253 | result = "(((" + op_a + " + " + op_b + ") >> 16) + " + op_c + ')'; | ||
| 2254 | break; | ||
| 2255 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 2256 | result = "(((" + op_a + " + " + op_b + ") << 16) + " + op_c + ')'; | ||
| 2257 | break; | ||
| 2258 | default: | ||
| 2259 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2260 | break; | ||
| 2261 | } | ||
| 2262 | } else { | ||
| 2263 | result = '(' + op_a + " + " + op_b + " + " + op_c + ')'; | ||
| 2264 | } | ||
| 2265 | |||
| 2266 | regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false, | ||
| 2267 | instr.generates_cc); | ||
| 2268 | break; | ||
| 2269 | } | ||
| 2270 | case OpCode::Id::ISCADD_C: | ||
| 2271 | case OpCode::Id::ISCADD_R: | ||
| 2272 | case OpCode::Id::ISCADD_IMM: { | ||
| 2273 | UNIMPLEMENTED_IF_MSG( | ||
| 2274 | instr.generates_cc, | ||
| 2275 | "Condition codes generation in ISCADD is partially implemented"); | ||
| 2276 | |||
| 2277 | if (instr.alu_integer.negate_a) | ||
| 2278 | op_a = "-(" + op_a + ')'; | ||
| 2279 | |||
| 2280 | if (instr.alu_integer.negate_b) | ||
| 2281 | op_b = "-(" + op_b + ')'; | ||
| 2282 | |||
| 2283 | const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); | ||
| 2284 | |||
| 2285 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2286 | "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1, | ||
| 2287 | false, instr.generates_cc); | ||
| 2288 | break; | ||
| 2289 | } | ||
| 2290 | case OpCode::Id::POPC_C: | ||
| 2291 | case OpCode::Id::POPC_R: | ||
| 2292 | case OpCode::Id::POPC_IMM: { | ||
| 2293 | if (instr.popc.invert) { | ||
| 2294 | op_b = "~(" + op_b + ')'; | ||
| 2295 | } | ||
| 2296 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "bitCount(" + op_b + ')', 1, 1); | ||
| 2297 | break; | ||
| 2298 | } | ||
| 2299 | case OpCode::Id::SEL_C: | ||
| 2300 | case OpCode::Id::SEL_R: | ||
| 2301 | case OpCode::Id::SEL_IMM: { | ||
| 2302 | const std::string condition = | ||
| 2303 | GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 2304 | regs.SetRegisterToInteger(instr.gpr0, true, 0, | ||
| 2305 | '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1); | ||
| 2306 | break; | ||
| 2307 | } | ||
| 2308 | case OpCode::Id::LOP_C: | ||
| 2309 | case OpCode::Id::LOP_R: | ||
| 2310 | case OpCode::Id::LOP_IMM: { | ||
| 2311 | |||
| 2312 | if (instr.alu.lop.invert_a) | ||
| 2313 | op_a = "~(" + op_a + ')'; | ||
| 2314 | |||
| 2315 | if (instr.alu.lop.invert_b) | ||
| 2316 | op_b = "~(" + op_b + ')'; | ||
| 2317 | |||
| 2318 | WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 2319 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 2320 | instr.generates_cc); | ||
| 2321 | break; | ||
| 2322 | } | ||
| 2323 | case OpCode::Id::LOP3_C: | ||
| 2324 | case OpCode::Id::LOP3_R: | ||
| 2325 | case OpCode::Id::LOP3_IMM: { | ||
| 2326 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2327 | std::string lut; | ||
| 2328 | |||
| 2329 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 2330 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')'; | ||
| 2331 | } else { | ||
| 2332 | lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')'; | ||
| 2333 | } | ||
| 2334 | |||
| 2335 | WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 2336 | break; | ||
| 2337 | } | ||
| 2338 | case OpCode::Id::IMNMX_C: | ||
| 2339 | case OpCode::Id::IMNMX_R: | ||
| 2340 | case OpCode::Id::IMNMX_IMM: { | ||
| 2341 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 2342 | UNIMPLEMENTED_IF_MSG( | ||
| 2343 | instr.generates_cc, | ||
| 2344 | "Condition codes generation in IMNMX is partially implemented"); | ||
| 2345 | |||
| 2346 | const std::string condition = | ||
| 2347 | GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 2348 | const std::string parameters = op_a + ',' + op_b; | ||
| 2349 | regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, | ||
| 2350 | '(' + condition + ") ? min(" + parameters + ") : max(" + | ||
| 2351 | parameters + ')', | ||
| 2352 | 1, 1, false, instr.generates_cc); | ||
| 2353 | break; | ||
| 2354 | } | ||
| 2355 | case OpCode::Id::LEA_R2: | ||
| 2356 | case OpCode::Id::LEA_R1: | ||
| 2357 | case OpCode::Id::LEA_IMM: | ||
| 2358 | case OpCode::Id::LEA_RZ: | ||
| 2359 | case OpCode::Id::LEA_HI: { | ||
| 2360 | std::string op_c; | ||
| 2361 | |||
| 2362 | switch (opcode->get().GetId()) { | ||
| 2363 | case OpCode::Id::LEA_R2: { | ||
| 2364 | op_a = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2365 | op_b = regs.GetRegisterAsInteger(instr.gpr39); | ||
| 2366 | op_c = std::to_string(instr.lea.r2.entry_a); | ||
| 2367 | break; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | case OpCode::Id::LEA_R1: { | ||
| 2371 | const bool neg = instr.lea.r1.neg != 0; | ||
| 2372 | op_a = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2373 | if (neg) | ||
| 2374 | op_a = "-(" + op_a + ')'; | ||
| 2375 | op_b = regs.GetRegisterAsInteger(instr.gpr20); | ||
| 2376 | op_c = std::to_string(instr.lea.r1.entry_a); | ||
| 2377 | break; | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | case OpCode::Id::LEA_IMM: { | ||
| 2381 | const bool neg = instr.lea.imm.neg != 0; | ||
| 2382 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2383 | if (neg) | ||
| 2384 | op_b = "-(" + op_b + ')'; | ||
| 2385 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2386 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2387 | break; | ||
| 2388 | } | ||
| 2389 | |||
| 2390 | case OpCode::Id::LEA_RZ: { | ||
| 2391 | const bool neg = instr.lea.rz.neg != 0; | ||
| 2392 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | ||
| 2393 | if (neg) | ||
| 2394 | op_b = "-(" + op_b + ')'; | ||
| 2395 | op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset, | ||
| 2396 | GLSLRegister::Type::Integer); | ||
| 2397 | op_c = std::to_string(instr.lea.rz.entry_a); | ||
| 2398 | |||
| 2399 | break; | ||
| 2400 | } | ||
| 2401 | 765 | ||
| 2402 | case OpCode::Id::LEA_HI: | 766 | std::string Assign(Operation operation) { |
| 2403 | default: { | 767 | const Node dest = operation[0]; |
| 2404 | op_b = regs.GetRegisterAsInteger(instr.gpr8); | 768 | const Node src = operation[1]; |
| 2405 | op_a = std::to_string(instr.lea.imm.entry_a); | ||
| 2406 | op_c = std::to_string(instr.lea.imm.entry_b); | ||
| 2407 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 2408 | } | ||
| 2409 | } | ||
| 2410 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 2411 | "Unhandled LEA Predicate"); | ||
| 2412 | const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))"; | ||
| 2413 | regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false, | ||
| 2414 | instr.generates_cc); | ||
| 2415 | 769 | ||
| 2416 | break; | 770 | std::string target; |
| 2417 | } | 771 | if (const auto gpr = std::get_if<GprNode>(dest)) { |
| 2418 | default: { | 772 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 2419 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", | 773 | // Writing to Register::ZeroIndex is a no op |
| 2420 | opcode->get().GetName()); | 774 | return {}; |
| 2421 | } | ||
| 2422 | } | 775 | } |
| 776 | target = GetRegister(gpr->GetIndex()); | ||
| 2423 | 777 | ||
| 2424 | break; | 778 | } else if (const auto abuf = std::get_if<AbufNode>(dest)) { |
| 2425 | } | 779 | target = [&]() -> std::string { |
| 2426 | case OpCode::Type::ArithmeticHalf: { | 780 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { |
| 2427 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | 781 | case Attribute::Index::Position: |
| 2428 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | 782 | return "position" + GetSwizzle(abuf->GetElement()); |
| 2429 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | 783 | case Attribute::Index::PointSize: |
| 2430 | } | 784 | return "gl_PointSize"; |
| 2431 | const bool negate_a = | 785 | case Attribute::Index::ClipDistances0123: |
| 2432 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | 786 | return "gl_ClipDistance[" + std::to_string(abuf->GetElement()) + ']'; |
| 2433 | const bool negate_b = | 787 | case Attribute::Index::ClipDistances4567: |
| 2434 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | 788 | return "gl_ClipDistance[" + std::to_string(abuf->GetElement() + 4) + ']'; |
| 2435 | |||
| 2436 | const std::string op_a = | ||
| 2437 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, | ||
| 2438 | instr.alu_half.abs_a != 0, negate_a); | ||
| 2439 | |||
| 2440 | std::string op_b; | ||
| 2441 | switch (opcode->get().GetId()) { | ||
| 2442 | case OpCode::Id::HADD2_C: | ||
| 2443 | case OpCode::Id::HMUL2_C: | ||
| 2444 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2445 | GLSLRegister::Type::UnsignedInteger); | ||
| 2446 | break; | ||
| 2447 | case OpCode::Id::HADD2_R: | ||
| 2448 | case OpCode::Id::HMUL2_R: | ||
| 2449 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false); | ||
| 2450 | break; | ||
| 2451 | default: | ||
| 2452 | UNREACHABLE(); | ||
| 2453 | op_b = "0"; | ||
| 2454 | break; | ||
| 2455 | } | ||
| 2456 | op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); | ||
| 2457 | |||
| 2458 | const std::string result = [&]() { | ||
| 2459 | switch (opcode->get().GetId()) { | ||
| 2460 | case OpCode::Id::HADD2_C: | ||
| 2461 | case OpCode::Id::HADD2_R: | ||
| 2462 | return '(' + op_a + " + " + op_b + ')'; | ||
| 2463 | case OpCode::Id::HMUL2_C: | ||
| 2464 | case OpCode::Id::HMUL2_R: | ||
| 2465 | return '(' + op_a + " * " + op_b + ')'; | ||
| 2466 | default: | ||
| 2467 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", | ||
| 2468 | opcode->get().GetName()); | ||
| 2469 | return std::string("0"); | ||
| 2470 | } | ||
| 2471 | }(); | ||
| 2472 | |||
| 2473 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, | ||
| 2474 | instr.alu_half.saturate != 0); | ||
| 2475 | break; | ||
| 2476 | } | ||
| 2477 | case OpCode::Type::ArithmeticHalfImmediate: { | ||
| 2478 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 2479 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 2480 | } else { | ||
| 2481 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != | ||
| 2482 | Tegra::Shader::HalfPrecision::None); | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | const std::string op_a = GetHalfFloat( | ||
| 2486 | regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, | ||
| 2487 | instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); | ||
| 2488 | |||
| 2489 | const std::string op_b = UnpackHalfImmediate(instr, true); | ||
| 2490 | |||
| 2491 | const std::string result = [&]() { | ||
| 2492 | switch (opcode->get().GetId()) { | ||
| 2493 | case OpCode::Id::HADD2_IMM: | ||
| 2494 | return op_a + " + " + op_b; | ||
| 2495 | case OpCode::Id::HMUL2_IMM: | ||
| 2496 | return op_a + " * " + op_b; | ||
| 2497 | default: | 789 | default: |
| 2498 | UNREACHABLE(); | 790 | if (attribute >= Attribute::Index::Attribute_0 && |
| 2499 | return std::string("0"); | 791 | attribute <= Attribute::Index::Attribute_31) { |
| 792 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); | ||
| 793 | } | ||
| 794 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 795 | static_cast<u32>(attribute)); | ||
| 796 | return "0"; | ||
| 2500 | } | 797 | } |
| 2501 | }(); | 798 | }(); |
| 2502 | 799 | ||
| 2503 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, | 800 | } else if (const auto lmem = std::get_if<LmemNode>(dest)) { |
| 2504 | instr.alu_half_imm.saturate != 0); | 801 | target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; |
| 2505 | break; | ||
| 2506 | } | ||
| 2507 | case OpCode::Type::Ffma: { | ||
| 2508 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 2509 | std::string op_b = instr.ffma.negate_b ? "-" : ""; | ||
| 2510 | std::string op_c = instr.ffma.negate_c ? "-" : ""; | ||
| 2511 | |||
| 2512 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 2513 | UNIMPLEMENTED_IF_MSG( | ||
| 2514 | instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 2515 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 2516 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 2517 | instr.ffma.tab5980_1.Value()); | ||
| 2518 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 2519 | "Condition codes generation in FFMA is partially implemented"); | ||
| 2520 | |||
| 2521 | switch (opcode->get().GetId()) { | ||
| 2522 | case OpCode::Id::FFMA_CR: { | ||
| 2523 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2524 | GLSLRegister::Type::Float); | ||
| 2525 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2526 | break; | ||
| 2527 | } | ||
| 2528 | case OpCode::Id::FFMA_RR: { | ||
| 2529 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2530 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2531 | break; | ||
| 2532 | } | ||
| 2533 | case OpCode::Id::FFMA_RC: { | ||
| 2534 | op_b += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2535 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2536 | GLSLRegister::Type::Float); | ||
| 2537 | break; | ||
| 2538 | } | ||
| 2539 | case OpCode::Id::FFMA_IMM: { | ||
| 2540 | op_b += GetImmediate19(instr); | ||
| 2541 | op_c += regs.GetRegisterAsFloat(instr.gpr39); | ||
| 2542 | break; | ||
| 2543 | } | ||
| 2544 | default: { | ||
| 2545 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 2546 | } | ||
| 2547 | } | ||
| 2548 | 802 | ||
| 2549 | regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')', | 803 | } else { |
| 2550 | 1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true); | 804 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 2551 | break; | ||
| 2552 | } | 805 | } |
| 2553 | case OpCode::Type::Hfma2: { | ||
| 2554 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 2555 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2556 | } else { | ||
| 2557 | UNIMPLEMENTED_IF(instr.hfma2.precision != Tegra::Shader::HalfPrecision::None); | ||
| 2558 | } | ||
| 2559 | const bool saturate = opcode->get().GetId() == OpCode::Id::HFMA2_RR | ||
| 2560 | ? instr.hfma2.rr.saturate != 0 | ||
| 2561 | : instr.hfma2.saturate != 0; | ||
| 2562 | |||
| 2563 | const std::string op_a = | ||
| 2564 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); | ||
| 2565 | std::string op_b, op_c; | ||
| 2566 | |||
| 2567 | switch (opcode->get().GetId()) { | ||
| 2568 | case OpCode::Id::HFMA2_CR: | ||
| 2569 | op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2570 | GLSLRegister::Type::UnsignedInteger), | ||
| 2571 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2572 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2573 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2574 | break; | ||
| 2575 | case OpCode::Id::HFMA2_RC: | ||
| 2576 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2577 | instr.hfma2.type_reg39, false, instr.hfma2.negate_b); | ||
| 2578 | op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2579 | GLSLRegister::Type::UnsignedInteger), | ||
| 2580 | instr.hfma2.type_b, false, instr.hfma2.negate_c); | ||
| 2581 | break; | ||
| 2582 | case OpCode::Id::HFMA2_RR: | ||
| 2583 | op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 2584 | instr.hfma2.type_b, false, instr.hfma2.negate_b); | ||
| 2585 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2586 | instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); | ||
| 2587 | break; | ||
| 2588 | case OpCode::Id::HFMA2_IMM_R: | ||
| 2589 | op_b = UnpackHalfImmediate(instr, true); | ||
| 2590 | op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), | ||
| 2591 | instr.hfma2.type_reg39, false, instr.hfma2.negate_c); | ||
| 2592 | break; | ||
| 2593 | default: | ||
| 2594 | UNREACHABLE(); | ||
| 2595 | op_c = op_b = "vec2(0)"; | ||
| 2596 | break; | ||
| 2597 | } | ||
| 2598 | 806 | ||
| 2599 | const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | 807 | code.AddLine(target + " = " + Visit(src) + ';'); |
| 808 | return {}; | ||
| 809 | } | ||
| 2600 | 810 | ||
| 2601 | regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); | 811 | std::string Composite(Operation operation) { |
| 2602 | break; | 812 | std::string value = "vec4("; |
| 813 | for (std::size_t i = 0; i < 4; ++i) { | ||
| 814 | value += Visit(operation[i]); | ||
| 815 | if (i < 3) | ||
| 816 | value += ", "; | ||
| 2603 | } | 817 | } |
| 2604 | case OpCode::Type::Conversion: { | 818 | value += ')'; |
| 2605 | switch (opcode->get().GetId()) { | 819 | return value; |
| 2606 | case OpCode::Id::I2I_R: { | 820 | } |
| 2607 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2608 | |||
| 2609 | std::string op_a = regs.GetRegisterAsInteger( | ||
| 2610 | instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); | ||
| 2611 | 821 | ||
| 2612 | if (instr.conversion.abs_a) { | 822 | template <Type type> |
| 2613 | op_a = "abs(" + op_a + ')'; | 823 | std::string Add(Operation operation) { |
| 2614 | } | 824 | return GenerateBinaryInfix(operation, "+", type, type, type); |
| 825 | } | ||
| 2615 | 826 | ||
| 2616 | if (instr.conversion.negate_a) { | 827 | template <Type type> |
| 2617 | op_a = "-(" + op_a + ')'; | 828 | std::string Mul(Operation operation) { |
| 2618 | } | 829 | return GenerateBinaryInfix(operation, "*", type, type, type); |
| 830 | } | ||
| 2619 | 831 | ||
| 2620 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 832 | template <Type type> |
| 2621 | 1, instr.alu.saturate_d, instr.generates_cc, 0, | 833 | std::string Div(Operation operation) { |
| 2622 | instr.conversion.dest_size); | 834 | return GenerateBinaryInfix(operation, "/", type, type, type); |
| 2623 | break; | 835 | } |
| 2624 | } | ||
| 2625 | case OpCode::Id::I2F_R: | ||
| 2626 | case OpCode::Id::I2F_C: { | ||
| 2627 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2628 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 2629 | std::string op_a; | ||
| 2630 | |||
| 2631 | if (instr.is_b_gpr) { | ||
| 2632 | op_a = | ||
| 2633 | regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, | ||
| 2634 | instr.conversion.src_size); | ||
| 2635 | } else { | ||
| 2636 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2637 | instr.conversion.is_input_signed | ||
| 2638 | ? GLSLRegister::Type::Integer | ||
| 2639 | : GLSLRegister::Type::UnsignedInteger, | ||
| 2640 | instr.conversion.src_size); | ||
| 2641 | } | ||
| 2642 | 836 | ||
| 2643 | if (instr.conversion.abs_a) { | 837 | template <Type type> |
| 2644 | op_a = "abs(" + op_a + ')'; | 838 | std::string Fma(Operation operation) { |
| 2645 | } | 839 | return GenerateTernary(operation, "fma", type, type, type, type); |
| 840 | } | ||
| 2646 | 841 | ||
| 2647 | if (instr.conversion.negate_a) { | 842 | template <Type type> |
| 2648 | op_a = "-(" + op_a + ')'; | 843 | std::string Negate(Operation operation) { |
| 2649 | } | 844 | return GenerateUnary(operation, "-", type, type, true); |
| 845 | } | ||
| 2650 | 846 | ||
| 2651 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc); | 847 | template <Type type> |
| 2652 | break; | 848 | std::string Absolute(Operation operation) { |
| 2653 | } | 849 | return GenerateUnary(operation, "abs", type, type, false); |
| 2654 | case OpCode::Id::F2F_R: { | 850 | } |
| 2655 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 2656 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 2657 | std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 2658 | 851 | ||
| 2659 | if (instr.conversion.abs_a) { | 852 | std::string FClamp(Operation operation) { |
| 2660 | op_a = "abs(" + op_a + ')'; | 853 | return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float, |
| 2661 | } | 854 | Type::Float); |
| 855 | } | ||
| 2662 | 856 | ||
| 2663 | if (instr.conversion.negate_a) { | 857 | template <Type type> |
| 2664 | op_a = "-(" + op_a + ')'; | 858 | std::string Min(Operation operation) { |
| 2665 | } | 859 | return GenerateBinaryCall(operation, "min", type, type, type); |
| 860 | } | ||
| 2666 | 861 | ||
| 2667 | switch (instr.conversion.f2f.rounding) { | 862 | template <Type type> |
| 2668 | case Tegra::Shader::F2fRoundingOp::None: | 863 | std::string Max(Operation operation) { |
| 2669 | break; | 864 | return GenerateBinaryCall(operation, "max", type, type, type); |
| 2670 | case Tegra::Shader::F2fRoundingOp::Round: | 865 | } |
| 2671 | op_a = "roundEven(" + op_a + ')'; | ||
| 2672 | break; | ||
| 2673 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 2674 | op_a = "floor(" + op_a + ')'; | ||
| 2675 | break; | ||
| 2676 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 2677 | op_a = "ceil(" + op_a + ')'; | ||
| 2678 | break; | ||
| 2679 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 2680 | op_a = "trunc(" + op_a + ')'; | ||
| 2681 | break; | ||
| 2682 | default: | ||
| 2683 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 2684 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 2685 | break; | ||
| 2686 | } | ||
| 2687 | 866 | ||
| 2688 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d, | 867 | std::string Select(Operation operation) { |
| 2689 | instr.generates_cc); | 868 | const std::string condition = Visit(operation[0]); |
| 2690 | break; | 869 | const std::string true_case = Visit(operation[1]); |
| 2691 | } | 870 | const std::string false_case = Visit(operation[2]); |
| 2692 | case OpCode::Id::F2I_R: | 871 | return ApplyPrecise(operation, |
| 2693 | case OpCode::Id::F2I_C: { | 872 | '(' + condition + " ? " + true_case + " : " + false_case + ')'); |
| 2694 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 873 | } |
| 2695 | std::string op_a{}; | ||
| 2696 | 874 | ||
| 2697 | if (instr.is_b_gpr) { | 875 | std::string FCos(Operation operation) { |
| 2698 | op_a = regs.GetRegisterAsFloat(instr.gpr20); | 876 | return GenerateUnary(operation, "cos", Type::Float, Type::Float, false); |
| 2699 | } else { | 877 | } |
| 2700 | op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 2701 | GLSLRegister::Type::Float); | ||
| 2702 | } | ||
| 2703 | 878 | ||
| 2704 | if (instr.conversion.abs_a) { | 879 | std::string FSin(Operation operation) { |
| 2705 | op_a = "abs(" + op_a + ')'; | 880 | return GenerateUnary(operation, "sin", Type::Float, Type::Float, false); |
| 2706 | } | 881 | } |
| 2707 | 882 | ||
| 2708 | if (instr.conversion.negate_a) { | 883 | std::string FExp2(Operation operation) { |
| 2709 | op_a = "-(" + op_a + ')'; | 884 | return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false); |
| 2710 | } | 885 | } |
| 2711 | 886 | ||
| 2712 | switch (instr.conversion.f2i.rounding) { | 887 | std::string FLog2(Operation operation) { |
| 2713 | case Tegra::Shader::F2iRoundingOp::None: | 888 | return GenerateUnary(operation, "log2", Type::Float, Type::Float, false); |
| 2714 | break; | 889 | } |
| 2715 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 2716 | op_a = "floor(" + op_a + ')'; | ||
| 2717 | break; | ||
| 2718 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 2719 | op_a = "ceil(" + op_a + ')'; | ||
| 2720 | break; | ||
| 2721 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 2722 | op_a = "trunc(" + op_a + ')'; | ||
| 2723 | break; | ||
| 2724 | default: | ||
| 2725 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 2726 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 2727 | break; | ||
| 2728 | } | ||
| 2729 | 890 | ||
| 2730 | if (instr.conversion.is_output_signed) { | 891 | std::string FInverseSqrt(Operation operation) { |
| 2731 | op_a = "int(" + op_a + ')'; | 892 | return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false); |
| 2732 | } else { | 893 | } |
| 2733 | op_a = "uint(" + op_a + ')'; | ||
| 2734 | } | ||
| 2735 | 894 | ||
| 2736 | regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | 895 | std::string FSqrt(Operation operation) { |
| 2737 | 1, false, instr.generates_cc, 0, | 896 | return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false); |
| 2738 | instr.conversion.dest_size); | 897 | } |
| 2739 | break; | ||
| 2740 | } | ||
| 2741 | default: { | ||
| 2742 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 2743 | } | ||
| 2744 | } | ||
| 2745 | break; | ||
| 2746 | } | ||
| 2747 | case OpCode::Type::Memory: { | ||
| 2748 | switch (opcode->get().GetId()) { | ||
| 2749 | case OpCode::Id::LD_A: { | ||
| 2750 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 2751 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2752 | "Indirect attribute loads are not supported"); | ||
| 2753 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2754 | "Unaligned attribute loads are not supported"); | ||
| 2755 | |||
| 2756 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 2757 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 2758 | |||
| 2759 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2760 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2761 | |||
| 2762 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 2763 | regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, | ||
| 2764 | static_cast<Attribute::Index>(next_index), | ||
| 2765 | input_mode, instr.gpr39.Value()); | ||
| 2766 | |||
| 2767 | // Load the next attribute element into the following register. If the element | ||
| 2768 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2769 | // attribute. | ||
| 2770 | next_element = (next_element + 1) % 4; | ||
| 2771 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2772 | }; | ||
| 2773 | |||
| 2774 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2775 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2776 | LoadNextElement(reg_offset); | ||
| 2777 | } | ||
| 2778 | break; | ||
| 2779 | } | ||
| 2780 | case OpCode::Id::LD_C: { | ||
| 2781 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 2782 | |||
| 2783 | const auto scope = shader.Scope(); | ||
| 2784 | |||
| 2785 | shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 2786 | " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); | ||
| 2787 | |||
| 2788 | const std::string op_a = | ||
| 2789 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", | ||
| 2790 | GLSLRegister::Type::Float); | ||
| 2791 | |||
| 2792 | switch (instr.ld_c.type.Value()) { | ||
| 2793 | case Tegra::Shader::UniformType::Single: | ||
| 2794 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2795 | break; | ||
| 2796 | |||
| 2797 | case Tegra::Shader::UniformType::Double: { | ||
| 2798 | const std::string op_b = | ||
| 2799 | regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, | ||
| 2800 | "index", GLSLRegister::Type::Float); | ||
| 2801 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | ||
| 2802 | regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); | ||
| 2803 | break; | ||
| 2804 | } | ||
| 2805 | default: | ||
| 2806 | UNIMPLEMENTED_MSG("Unhandled type: {}", | ||
| 2807 | static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 2808 | } | ||
| 2809 | break; | ||
| 2810 | } | ||
| 2811 | case OpCode::Id::LD_L: { | ||
| 2812 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 2813 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 2814 | 898 | ||
| 2815 | const auto scope = shader.Scope(); | 899 | std::string FRoundEven(Operation operation) { |
| 900 | return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false); | ||
| 901 | } | ||
| 2816 | 902 | ||
| 2817 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | 903 | std::string FFloor(Operation operation) { |
| 2818 | std::to_string(instr.smem_imm.Value()) + ')'; | 904 | return GenerateUnary(operation, "floor", Type::Float, Type::Float, false); |
| 905 | } | ||
| 2819 | 906 | ||
| 2820 | shader.AddLine("uint index = (" + op + " / 4);"); | 907 | std::string FCeil(Operation operation) { |
| 908 | return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false); | ||
| 909 | } | ||
| 2821 | 910 | ||
| 2822 | const std::string op_a = regs.GetLocalMemoryAsFloat("index"); | 911 | std::string FTrunc(Operation operation) { |
| 912 | return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false); | ||
| 913 | } | ||
| 2823 | 914 | ||
| 2824 | switch (instr.ldst_sl.type.Value()) { | 915 | template <Type type> |
| 2825 | case Tegra::Shader::StoreType::Bytes32: | 916 | std::string FCastInteger(Operation operation) { |
| 2826 | regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); | 917 | return GenerateUnary(operation, "float", Type::Float, type, false); |
| 2827 | break; | 918 | } |
| 2828 | default: | ||
| 2829 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 2830 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2831 | } | ||
| 2832 | break; | ||
| 2833 | } | ||
| 2834 | case OpCode::Id::ST_A: { | ||
| 2835 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 2836 | "Indirect attribute loads are not supported"); | ||
| 2837 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 2838 | "Unaligned attribute loads are not supported"); | ||
| 2839 | |||
| 2840 | u64 next_element = instr.attribute.fmt20.element; | ||
| 2841 | u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 2842 | |||
| 2843 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 2844 | regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), | ||
| 2845 | next_element, instr.gpr0.Value() + reg_offset, | ||
| 2846 | instr.gpr39.Value()); | ||
| 2847 | |||
| 2848 | // Load the next attribute element into the following register. If the element | ||
| 2849 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 2850 | // attribute. | ||
| 2851 | next_element = (next_element + 1) % 4; | ||
| 2852 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 2853 | }; | ||
| 2854 | |||
| 2855 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 2856 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 2857 | StoreNextElement(reg_offset); | ||
| 2858 | } | ||
| 2859 | 919 | ||
| 2860 | break; | 920 | std::string ICastFloat(Operation operation) { |
| 2861 | } | 921 | return GenerateUnary(operation, "int", Type::Int, Type::Float, false); |
| 2862 | case OpCode::Id::ST_L: { | 922 | } |
| 2863 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 2864 | static_cast<unsigned>(instr.st_l.unknown.Value())); | ||
| 2865 | 923 | ||
| 2866 | const auto scope = shader.Scope(); | 924 | std::string ICastUnsigned(Operation operation) { |
| 925 | return GenerateUnary(operation, "int", Type::Int, Type::Uint, false); | ||
| 926 | } | ||
| 2867 | 927 | ||
| 2868 | std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + | 928 | template <Type type> |
| 2869 | std::to_string(instr.smem_imm.Value()) + ')'; | 929 | std::string LogicalShiftLeft(Operation operation) { |
| 930 | return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint); | ||
| 931 | } | ||
| 2870 | 932 | ||
| 2871 | shader.AddLine("uint index = (" + op + " / 4);"); | 933 | std::string ILogicalShiftRight(Operation operation) { |
| 934 | const std::string op_a = VisitOperand(operation, 0, Type::Uint); | ||
| 935 | const std::string op_b = VisitOperand(operation, 1, Type::Uint); | ||
| 2872 | 936 | ||
| 2873 | switch (instr.ldst_sl.type.Value()) { | 937 | return ApplyPrecise(operation, |
| 2874 | case Tegra::Shader::StoreType::Bytes32: | 938 | BitwiseCastResult("int(" + op_a + " >> " + op_b + ')', Type::Int)); |
| 2875 | regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); | 939 | } |
| 2876 | break; | ||
| 2877 | default: | ||
| 2878 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 2879 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 2880 | } | ||
| 2881 | break; | ||
| 2882 | } | ||
| 2883 | case OpCode::Id::TEX: { | ||
| 2884 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||
| 2885 | const bool is_array = instr.tex.array != 0; | ||
| 2886 | const bool depth_compare = | ||
| 2887 | instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2888 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 2889 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2890 | "NODEP is not implemented"); | ||
| 2891 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2892 | "AOFFI is not implemented"); | ||
| 2893 | |||
| 2894 | const auto [coord, texture] = | ||
| 2895 | GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 2896 | |||
| 2897 | const auto scope = shader.Scope(); | ||
| 2898 | shader.AddLine(coord); | ||
| 2899 | |||
| 2900 | if (depth_compare) { | ||
| 2901 | regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1); | ||
| 2902 | } else { | ||
| 2903 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2904 | std::size_t dest_elem{}; | ||
| 2905 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2906 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2907 | // Skip disabled components | ||
| 2908 | continue; | ||
| 2909 | } | ||
| 2910 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2911 | dest_elem); | ||
| 2912 | ++dest_elem; | ||
| 2913 | } | ||
| 2914 | } | ||
| 2915 | break; | ||
| 2916 | } | ||
| 2917 | case OpCode::Id::TEXS: { | ||
| 2918 | Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 2919 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 2920 | const bool depth_compare = | ||
| 2921 | instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2922 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 2923 | 940 | ||
| 2924 | UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | 941 | std::string IArithmeticShiftRight(Operation operation) { |
| 2925 | "NODEP is not implemented"); | 942 | return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint); |
| 943 | } | ||
| 2926 | 944 | ||
| 2927 | const auto scope = shader.Scope(); | 945 | template <Type type> |
| 946 | std::string BitwiseAnd(Operation operation) { | ||
| 947 | return GenerateBinaryInfix(operation, "&", type, type, type); | ||
| 948 | } | ||
| 2928 | 949 | ||
| 2929 | auto [coord, texture] = | 950 | template <Type type> |
| 2930 | GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array); | 951 | std::string BitwiseOr(Operation operation) { |
| 952 | return GenerateBinaryInfix(operation, "|", type, type, type); | ||
| 953 | } | ||
| 2931 | 954 | ||
| 2932 | shader.AddLine(coord); | 955 | template <Type type> |
| 956 | std::string BitwiseXor(Operation operation) { | ||
| 957 | return GenerateBinaryInfix(operation, "^", type, type, type); | ||
| 958 | } | ||
| 2933 | 959 | ||
| 2934 | if (depth_compare) { | 960 | template <Type type> |
| 2935 | texture = "vec4(" + texture + ')'; | 961 | std::string BitwiseNot(Operation operation) { |
| 2936 | } | 962 | return GenerateUnary(operation, "~", type, type, false); |
| 2937 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | 963 | } |
| 2938 | 964 | ||
| 2939 | if (instr.texs.fp32_flag) { | 965 | std::string UCastFloat(Operation operation) { |
| 2940 | WriteTexsInstructionFloat(instr, "texture_tmp"); | 966 | return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false); |
| 2941 | } else { | 967 | } |
| 2942 | WriteTexsInstructionHalfFloat(instr, "texture_tmp"); | ||
| 2943 | } | ||
| 2944 | break; | ||
| 2945 | } | ||
| 2946 | case OpCode::Id::TLDS: { | ||
| 2947 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 2948 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 2949 | 968 | ||
| 2950 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | 969 | std::string UCastSigned(Operation operation) { |
| 2951 | "NODEP is not implemented"); | 970 | return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false); |
| 2952 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | 971 | } |
| 2953 | "AOFFI is not implemented"); | ||
| 2954 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), | ||
| 2955 | "MZ is not implemented"); | ||
| 2956 | 972 | ||
| 2957 | const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array); | 973 | std::string UShiftRight(Operation operation) { |
| 974 | return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint); | ||
| 975 | } | ||
| 2958 | 976 | ||
| 2959 | const auto scope = shader.Scope(); | 977 | template <Type type> |
| 978 | std::string BitfieldInsert(Operation operation) { | ||
| 979 | return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int, | ||
| 980 | Type::Int); | ||
| 981 | } | ||
| 2960 | 982 | ||
| 2961 | shader.AddLine(coord); | 983 | template <Type type> |
| 2962 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | 984 | std::string BitfieldExtract(Operation operation) { |
| 2963 | WriteTexsInstructionFloat(instr, "texture_tmp"); | 985 | return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int); |
| 2964 | break; | 986 | } |
| 2965 | } | ||
| 2966 | case OpCode::Id::TLD4: { | ||
| 2967 | |||
| 2968 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 2969 | "NODEP is not implemented"); | ||
| 2970 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 2971 | "AOFFI is not implemented"); | ||
| 2972 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 2973 | "NDV is not implemented"); | ||
| 2974 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), | ||
| 2975 | "PTP is not implemented"); | ||
| 2976 | |||
| 2977 | auto texture_type = instr.tld4.texture_type.Value(); | ||
| 2978 | const bool depth_compare = | ||
| 2979 | instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | ||
| 2980 | const bool is_array = instr.tld4.array != 0; | ||
| 2981 | |||
| 2982 | const auto [coord, texture] = | ||
| 2983 | GetTLD4Code(instr, texture_type, depth_compare, is_array); | ||
| 2984 | |||
| 2985 | const auto scope = shader.Scope(); | ||
| 2986 | |||
| 2987 | shader.AddLine(coord); | ||
| 2988 | std::size_t dest_elem{}; | ||
| 2989 | |||
| 2990 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 2991 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 2992 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 2993 | // Skip disabled components | ||
| 2994 | continue; | ||
| 2995 | } | ||
| 2996 | regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false, | ||
| 2997 | dest_elem); | ||
| 2998 | ++dest_elem; | ||
| 2999 | } | ||
| 3000 | break; | ||
| 3001 | } | ||
| 3002 | case OpCode::Id::TLD4S: { | ||
| 3003 | UNIMPLEMENTED_IF_MSG( | ||
| 3004 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3005 | "NODEP is not implemented"); | ||
| 3006 | UNIMPLEMENTED_IF_MSG( | ||
| 3007 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||
| 3008 | "AOFFI is not implemented"); | ||
| 3009 | 987 | ||
| 3010 | const auto scope = shader.Scope(); | 988 | template <Type type> |
| 989 | std::string BitCount(Operation operation) { | ||
| 990 | return GenerateUnary(operation, "bitCount", type, type, false); | ||
| 991 | } | ||
| 3011 | 992 | ||
| 3012 | std::string coords; | 993 | std::string HNegate(Operation operation) { |
| 994 | const auto GetNegate = [&](std::size_t index) -> std::string { | ||
| 995 | return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1"; | ||
| 996 | }; | ||
| 997 | const std::string value = '(' + VisitOperand(operation, 0, Type::HalfFloat) + " * vec2(" + | ||
| 998 | GetNegate(1) + ", " + GetNegate(2) + "))"; | ||
| 999 | return BitwiseCastResult(value, Type::HalfFloat); | ||
| 1000 | } | ||
| 3013 | 1001 | ||
| 3014 | const bool depth_compare = | 1002 | std::string HMergeF32(Operation operation) { |
| 3015 | instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC); | 1003 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; |
| 1004 | } | ||
| 3016 | 1005 | ||
| 3017 | const std::string sampler = GetSampler( | 1006 | std::string HMergeH0(Operation operation) { |
| 3018 | instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare); | 1007 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + |
| 1008 | Visit(operation[1]) + ")[0]))"; | ||
| 1009 | } | ||
| 3019 | 1010 | ||
| 3020 | const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | 1011 | std::string HMergeH1(Operation operation) { |
| 3021 | coords = "vec2 coords = vec2(" + op_a + ", "; | 1012 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[0], toHalf2(" + |
| 3022 | std::string texture = "textureGather(" + sampler + ", coords, "; | 1013 | Visit(operation[1]) + ")[1]))"; |
| 1014 | } | ||
| 3023 | 1015 | ||
| 3024 | if (!depth_compare) { | 1016 | std::string HPack2(Operation operation) { |
| 3025 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); | 1017 | return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))"; |
| 3026 | coords += op_b + ");"; | 1018 | } |
| 3027 | texture += std::to_string(instr.tld4s.component) + ')'; | ||
| 3028 | } else { | ||
| 3029 | const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3030 | const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3031 | coords += op_b + ");"; | ||
| 3032 | texture += op_c + ')'; | ||
| 3033 | } | ||
| 3034 | shader.AddLine(coords); | ||
| 3035 | shader.AddLine("vec4 texture_tmp = " + texture + ';'); | ||
| 3036 | WriteTexsInstructionFloat(instr, "texture_tmp"); | ||
| 3037 | break; | ||
| 3038 | } | ||
| 3039 | case OpCode::Id::TXQ: { | ||
| 3040 | UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3041 | "NODEP is not implemented"); | ||
| 3042 | |||
| 3043 | const auto scope = shader.Scope(); | ||
| 3044 | |||
| 3045 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 3046 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 3047 | // uses. This must be fixed at a later instance. | ||
| 3048 | const std::string sampler = | ||
| 3049 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 3050 | switch (instr.txq.query_type) { | ||
| 3051 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 3052 | const std::string texture = "textureSize(" + sampler + ", " + | ||
| 3053 | regs.GetRegisterAsInteger(instr.gpr8) + ')'; | ||
| 3054 | const std::string mip_level = "textureQueryLevels(" + sampler + ')'; | ||
| 3055 | shader.AddLine("ivec2 sizes = " + texture + ';'); | ||
| 3056 | |||
| 3057 | regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1); | ||
| 3058 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1); | ||
| 3059 | regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1); | ||
| 3060 | regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1); | ||
| 3061 | break; | ||
| 3062 | } | ||
| 3063 | default: { | ||
| 3064 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 3065 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 3066 | } | ||
| 3067 | } | ||
| 3068 | break; | ||
| 3069 | } | ||
| 3070 | case OpCode::Id::TMML: { | ||
| 3071 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||
| 3072 | "NODEP is not implemented"); | ||
| 3073 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 3074 | "NDV is not implemented"); | ||
| 3075 | |||
| 3076 | const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||
| 3077 | const bool is_array = instr.tmml.array != 0; | ||
| 3078 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 3079 | const std::string sampler = | ||
| 3080 | GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 3081 | |||
| 3082 | const auto scope = shader.Scope(); | ||
| 3083 | |||
| 3084 | // TODO: Add coordinates for different samplers once other texture types are | ||
| 3085 | // implemented. | ||
| 3086 | switch (texture_type) { | ||
| 3087 | case Tegra::Shader::TextureType::Texture1D: { | ||
| 3088 | shader.AddLine("float coords = " + x + ';'); | ||
| 3089 | break; | ||
| 3090 | } | ||
| 3091 | case Tegra::Shader::TextureType::Texture2D: { | ||
| 3092 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||
| 3093 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | ||
| 3094 | break; | ||
| 3095 | } | ||
| 3096 | default: | ||
| 3097 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 3098 | 1019 | ||
| 3099 | // Fallback to interpreting as a 2D texture for now | 1020 | template <Type type> |
| 3100 | const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | 1021 | std::string LogicalLessThan(Operation operation) { |
| 3101 | shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");"); | 1022 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); |
| 3102 | texture_type = Tegra::Shader::TextureType::Texture2D; | 1023 | } |
| 3103 | } | ||
| 3104 | 1024 | ||
| 3105 | const std::string texture = "textureQueryLod(" + sampler + ", coords)"; | 1025 | template <Type type> |
| 3106 | shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);"); | 1026 | std::string LogicalEqual(Operation operation) { |
| 1027 | return GenerateBinaryInfix(operation, "==", Type::Bool, type, type); | ||
| 1028 | } | ||
| 3107 | 1029 | ||
| 3108 | regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1); | 1030 | template <Type type> |
| 3109 | regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1); | 1031 | std::string LogicalLessEqual(Operation operation) { |
| 3110 | break; | 1032 | return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); |
| 3111 | } | 1033 | } |
| 3112 | default: { | ||
| 3113 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 3114 | } | ||
| 3115 | } | ||
| 3116 | break; | ||
| 3117 | } | ||
| 3118 | case OpCode::Type::FloatSetPredicate: { | ||
| 3119 | const std::string op_a = | ||
| 3120 | GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 3121 | instr.fsetp.neg_a != 0); | ||
| 3122 | 1034 | ||
| 3123 | std::string op_b; | 1035 | template <Type type> |
| 1036 | std::string LogicalGreaterThan(Operation operation) { | ||
| 1037 | return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); | ||
| 1038 | } | ||
| 3124 | 1039 | ||
| 3125 | if (instr.is_b_imm) { | 1040 | template <Type type> |
| 3126 | op_b += '(' + GetImmediate19(instr) + ')'; | 1041 | std::string LogicalNotEqual(Operation operation) { |
| 3127 | } else { | 1042 | return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); |
| 3128 | if (instr.is_b_gpr) { | 1043 | } |
| 3129 | op_b += regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3130 | } else { | ||
| 3131 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3132 | GLSLRegister::Type::Float); | ||
| 3133 | } | ||
| 3134 | } | ||
| 3135 | 1044 | ||
| 3136 | if (instr.fsetp.abs_b) { | 1045 | template <Type type> |
| 3137 | op_b = "abs(" + op_b + ')'; | 1046 | std::string LogicalGreaterEqual(Operation operation) { |
| 3138 | } | 1047 | return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); |
| 1048 | } | ||
| 3139 | 1049 | ||
| 3140 | // We can't use the constant predicate as destination. | 1050 | std::string LogicalFIsNan(Operation operation) { |
| 3141 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 1051 | return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false); |
| 1052 | } | ||
| 3142 | 1053 | ||
| 3143 | const std::string second_pred = | 1054 | std::string LogicalAssign(Operation operation) { |
| 3144 | GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | 1055 | const Node dest = operation[0]; |
| 1056 | const Node src = operation[1]; | ||
| 3145 | 1057 | ||
| 3146 | const std::string combiner = GetPredicateCombiner(instr.fsetp.op); | 1058 | std::string target; |
| 3147 | 1059 | ||
| 3148 | const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); | 1060 | if (const auto pred = std::get_if<PredicateNode>(dest)) { |
| 3149 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1061 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); |
| 3150 | SetPredicate(instr.fsetp.pred3, | ||
| 3151 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3152 | 1062 | ||
| 3153 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1063 | const auto index = pred->GetIndex(); |
| 3154 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1064 | switch (index) { |
| 3155 | // if enabled | 1065 | case Tegra::Shader::Pred::NeverExecute: |
| 3156 | SetPredicate(instr.fsetp.pred0, | 1066 | case Tegra::Shader::Pred::UnusedIndex: |
| 3157 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | 1067 | // Writing to these predicates is a no-op |
| 1068 | return {}; | ||
| 3158 | } | 1069 | } |
| 3159 | break; | 1070 | target = GetPredicate(index); |
| 1071 | } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { | ||
| 1072 | target = GetInternalFlag(flag->GetFlag()); | ||
| 3160 | } | 1073 | } |
| 3161 | case OpCode::Type::IntegerSetPredicate: { | ||
| 3162 | const std::string op_a = | ||
| 3163 | regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed); | ||
| 3164 | std::string op_b; | ||
| 3165 | 1074 | ||
| 3166 | if (instr.is_b_imm) { | 1075 | code.AddLine(target + " = " + Visit(src) + ';'); |
| 3167 | op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; | 1076 | return {}; |
| 3168 | } else { | 1077 | } |
| 3169 | if (instr.is_b_gpr) { | ||
| 3170 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed); | ||
| 3171 | } else { | ||
| 3172 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3173 | GLSLRegister::Type::Integer); | ||
| 3174 | } | ||
| 3175 | } | ||
| 3176 | |||
| 3177 | // We can't use the constant predicate as destination. | ||
| 3178 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3179 | 1078 | ||
| 3180 | const std::string second_pred = | 1079 | std::string LogicalAnd(Operation operation) { |
| 3181 | GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); | 1080 | return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool); |
| 1081 | } | ||
| 3182 | 1082 | ||
| 3183 | const std::string combiner = GetPredicateCombiner(instr.isetp.op); | 1083 | std::string LogicalOr(Operation operation) { |
| 1084 | return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool); | ||
| 1085 | } | ||
| 3184 | 1086 | ||
| 3185 | const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); | 1087 | std::string LogicalXor(Operation operation) { |
| 3186 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1088 | return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool); |
| 3187 | SetPredicate(instr.isetp.pred3, | 1089 | } |
| 3188 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3189 | 1090 | ||
| 3190 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1091 | std::string LogicalNegate(Operation operation) { |
| 3191 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1092 | return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false); |
| 3192 | // if enabled | 1093 | } |
| 3193 | SetPredicate(instr.isetp.pred0, | ||
| 3194 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3195 | } | ||
| 3196 | break; | ||
| 3197 | } | ||
| 3198 | case OpCode::Type::HalfSetPredicate: { | ||
| 3199 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 3200 | |||
| 3201 | const std::string op_a = | ||
| 3202 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, | ||
| 3203 | instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 3204 | |||
| 3205 | const std::string op_b = [&]() { | ||
| 3206 | switch (opcode->get().GetId()) { | ||
| 3207 | case OpCode::Id::HSETP2_R: | ||
| 3208 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3209 | instr.hsetp2.type_b, instr.hsetp2.abs_a, | ||
| 3210 | instr.hsetp2.negate_b); | ||
| 3211 | default: | ||
| 3212 | UNREACHABLE(); | ||
| 3213 | return std::string("vec2(0)"); | ||
| 3214 | } | ||
| 3215 | }(); | ||
| 3216 | 1094 | ||
| 3217 | // We can't use the constant predicate as destination. | 1095 | std::string LogicalPick2(Operation operation) { |
| 3218 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 1096 | const std::string pair = VisitOperand(operation, 0, Type::Bool2); |
| 1097 | return pair + '[' + VisitOperand(operation, 1, Type::Uint) + ']'; | ||
| 1098 | } | ||
| 3219 | 1099 | ||
| 3220 | const std::string second_pred = | 1100 | std::string LogicalAll2(Operation operation) { |
| 3221 | GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | 1101 | return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); |
| 1102 | } | ||
| 3222 | 1103 | ||
| 3223 | const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); | 1104 | std::string LogicalAny2(Operation operation) { |
| 1105 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | ||
| 1106 | } | ||
| 3224 | 1107 | ||
| 3225 | const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; | 1108 | std::string Logical2HLessThan(Operation operation) { |
| 3226 | const std::string predicate = | 1109 | return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, |
| 3227 | '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + | 1110 | Type::HalfFloat); |
| 3228 | component_combiner + ' ' + | 1111 | } |
| 3229 | GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')'; | ||
| 3230 | 1112 | ||
| 3231 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 1113 | std::string Logical2HEqual(Operation operation) { |
| 3232 | SetPredicate(instr.hsetp2.pred3, | 1114 | return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, |
| 3233 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | 1115 | Type::HalfFloat); |
| 1116 | } | ||
| 3234 | 1117 | ||
| 3235 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1118 | std::string Logical2HLessEqual(Operation operation) { |
| 3236 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1119 | return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, |
| 3237 | // if enabled | 1120 | Type::HalfFloat); |
| 3238 | SetPredicate(instr.hsetp2.pred0, | 1121 | } |
| 3239 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3240 | } | ||
| 3241 | break; | ||
| 3242 | } | ||
| 3243 | case OpCode::Type::PredicateSetRegister: { | ||
| 3244 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3245 | "Condition codes generation in PSET is partially implemented"); | ||
| 3246 | |||
| 3247 | const std::string op_a = | ||
| 3248 | GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 3249 | const std::string op_b = | ||
| 3250 | GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 3251 | |||
| 3252 | const std::string second_pred = | ||
| 3253 | GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 3254 | |||
| 3255 | const std::string combiner = GetPredicateCombiner(instr.pset.op); | ||
| 3256 | |||
| 3257 | const std::string predicate = | ||
| 3258 | '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')'; | ||
| 3259 | const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')'; | ||
| 3260 | if (instr.pset.bf == 0) { | ||
| 3261 | const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0"; | ||
| 3262 | regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false, | ||
| 3263 | instr.generates_cc); | ||
| 3264 | } else { | ||
| 3265 | const std::string value = '(' + result + ") ? 1.0 : 0.0"; | ||
| 3266 | regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc); | ||
| 3267 | } | ||
| 3268 | break; | ||
| 3269 | } | ||
| 3270 | case OpCode::Type::PredicateSetPredicate: { | ||
| 3271 | switch (opcode->get().GetId()) { | ||
| 3272 | case OpCode::Id::PSETP: { | ||
| 3273 | const std::string op_a = | ||
| 3274 | GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 3275 | const std::string op_b = | ||
| 3276 | GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 3277 | |||
| 3278 | // We can't use the constant predicate as destination. | ||
| 3279 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3280 | |||
| 3281 | const std::string second_pred = | ||
| 3282 | GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 3283 | |||
| 3284 | const std::string combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 3285 | |||
| 3286 | const std::string predicate = | ||
| 3287 | '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | ||
| 3288 | |||
| 3289 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3290 | SetPredicate(instr.psetp.pred3, | ||
| 3291 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3292 | |||
| 3293 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3294 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 3295 | // if enabled | ||
| 3296 | SetPredicate(instr.psetp.pred0, | ||
| 3297 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3298 | } | ||
| 3299 | break; | ||
| 3300 | } | ||
| 3301 | case OpCode::Id::CSETP: { | ||
| 3302 | const std::string pred = | ||
| 3303 | GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 3304 | const std::string combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 3305 | const std::string condition_code = regs.GetConditionCode(instr.csetp.cc); | ||
| 3306 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3307 | SetPredicate(instr.csetp.pred3, | ||
| 3308 | '(' + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3309 | } | ||
| 3310 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3311 | SetPredicate(instr.csetp.pred0, | ||
| 3312 | "!(" + condition_code + ") " + combiner + " (" + pred + ')'); | ||
| 3313 | } | ||
| 3314 | break; | ||
| 3315 | } | ||
| 3316 | default: { | ||
| 3317 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 3318 | } | ||
| 3319 | } | ||
| 3320 | break; | ||
| 3321 | } | ||
| 3322 | case OpCode::Type::RegisterSetPredicate: { | ||
| 3323 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 3324 | 1122 | ||
| 3325 | const std::string apply_mask = [&]() { | 1123 | std::string Logical2HGreaterThan(Operation operation) { |
| 3326 | switch (opcode->get().GetId()) { | 1124 | return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, |
| 3327 | case OpCode::Id::R2P_IMM: | 1125 | Type::HalfFloat); |
| 3328 | return std::to_string(instr.r2p.immediate_mask); | 1126 | } |
| 3329 | default: | ||
| 3330 | UNREACHABLE(); | ||
| 3331 | return std::to_string(instr.r2p.immediate_mask); | ||
| 3332 | } | ||
| 3333 | }(); | ||
| 3334 | const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + | ||
| 3335 | " >> " + std::to_string(instr.r2p.byte) + ')'; | ||
| 3336 | 1127 | ||
| 3337 | constexpr u64 programmable_preds = 7; | 1128 | std::string Logical2HNotEqual(Operation operation) { |
| 3338 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | 1129 | return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, |
| 3339 | const auto shift = std::to_string(1 << pred); | 1130 | Type::HalfFloat); |
| 1131 | } | ||
| 3340 | 1132 | ||
| 3341 | shader.AddLine("if ((" + apply_mask + " & " + shift + ") != 0) {"); | 1133 | std::string Logical2HGreaterEqual(Operation operation) { |
| 3342 | ++shader.scope; | 1134 | return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, |
| 1135 | Type::HalfFloat); | ||
| 1136 | } | ||
| 3343 | 1137 | ||
| 3344 | SetPredicate(pred, '(' + mask + " & " + shift + ") != 0"); | 1138 | std::string F4Texture(Operation operation) { |
| 1139 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1140 | ASSERT(meta); | ||
| 3345 | 1141 | ||
| 3346 | --shader.scope; | 1142 | std::string expr = GenerateTexture(operation, "texture"); |
| 3347 | shader.AddLine('}'); | 1143 | if (meta->sampler.IsShadow()) { |
| 3348 | } | 1144 | expr = "vec4(" + expr + ')'; |
| 3349 | break; | ||
| 3350 | } | 1145 | } |
| 3351 | case OpCode::Type::FloatSet: { | 1146 | return expr + GetSwizzle(meta->element); |
| 3352 | const std::string op_a = GetOperandAbsNeg(regs.GetRegisterAsFloat(instr.gpr8), | 1147 | } |
| 3353 | instr.fset.abs_a != 0, instr.fset.neg_a != 0); | ||
| 3354 | |||
| 3355 | std::string op_b; | ||
| 3356 | |||
| 3357 | if (instr.is_b_imm) { | ||
| 3358 | const std::string imm = GetImmediate19(instr); | ||
| 3359 | op_b = imm; | ||
| 3360 | } else { | ||
| 3361 | if (instr.is_b_gpr) { | ||
| 3362 | op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||
| 3363 | } else { | ||
| 3364 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3365 | GLSLRegister::Type::Float); | ||
| 3366 | } | ||
| 3367 | } | ||
| 3368 | |||
| 3369 | op_b = GetOperandAbsNeg(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 3370 | |||
| 3371 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3372 | // condition is true, and to 0 otherwise. | ||
| 3373 | const std::string second_pred = | ||
| 3374 | GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 3375 | |||
| 3376 | const std::string combiner = GetPredicateCombiner(instr.fset.op); | ||
| 3377 | 1148 | ||
| 3378 | const std::string predicate = "((" + | 1149 | std::string F4TextureLod(Operation operation) { |
| 3379 | GetPredicateComparison(instr.fset.cond, op_a, op_b) + | 1150 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3380 | ") " + combiner + " (" + second_pred + "))"; | 1151 | ASSERT(meta); |
| 3381 | 1152 | ||
| 3382 | if (instr.fset.bf) { | 1153 | std::string expr = GenerateTexture(operation, "textureLod"); |
| 3383 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false, | 1154 | if (meta->sampler.IsShadow()) { |
| 3384 | instr.generates_cc); | 1155 | expr = "vec4(" + expr + ')'; |
| 3385 | } else { | ||
| 3386 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | ||
| 3387 | 1, false, instr.generates_cc); | ||
| 3388 | } | ||
| 3389 | break; | ||
| 3390 | } | 1156 | } |
| 3391 | case OpCode::Type::IntegerSet: { | 1157 | return expr + GetSwizzle(meta->element); |
| 3392 | const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed); | 1158 | } |
| 3393 | 1159 | ||
| 3394 | std::string op_b; | 1160 | std::string F4TextureGather(Operation operation) { |
| 1161 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1162 | ASSERT(meta); | ||
| 3395 | 1163 | ||
| 3396 | if (instr.is_b_imm) { | 1164 | return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + |
| 3397 | op_b = std::to_string(instr.alu.GetSignedImm20_20()); | 1165 | GetSwizzle(meta->element); |
| 3398 | } else { | 1166 | } |
| 3399 | if (instr.is_b_gpr) { | ||
| 3400 | op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed); | ||
| 3401 | } else { | ||
| 3402 | op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3403 | GLSLRegister::Type::Integer); | ||
| 3404 | } | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 3408 | // condition is true, and to 0 otherwise. | ||
| 3409 | const std::string second_pred = | ||
| 3410 | GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 3411 | 1167 | ||
| 3412 | const std::string combiner = GetPredicateCombiner(instr.iset.op); | 1168 | std::string F4TextureQueryDimensions(Operation operation) { |
| 1169 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||
| 1170 | ASSERT(meta); | ||
| 3413 | 1171 | ||
| 3414 | const std::string predicate = "((" + | 1172 | const std::string sampler = GetSampler(meta->sampler); |
| 3415 | GetPredicateComparison(instr.iset.cond, op_a, op_b) + | 1173 | const std::string lod = VisitOperand(operation, 0, Type::Int); |
| 3416 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3417 | 1174 | ||
| 3418 | if (instr.iset.bf) { | 1175 | switch (meta->element) { |
| 3419 | regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); | 1176 | case 0: |
| 3420 | } else { | 1177 | case 1: |
| 3421 | regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1, | 1178 | return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); |
| 3422 | 1); | 1179 | case 2: |
| 3423 | } | 1180 | return "0"; |
| 3424 | break; | 1181 | case 3: |
| 1182 | return "textureQueryLevels(" + sampler + ')'; | ||
| 3425 | } | 1183 | } |
| 3426 | case OpCode::Type::HalfSet: { | 1184 | UNREACHABLE(); |
| 3427 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | 1185 | return "0"; |
| 3428 | 1186 | } | |
| 3429 | const std::string op_a = | ||
| 3430 | GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, | ||
| 3431 | instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); | ||
| 3432 | |||
| 3433 | const std::string op_b = [&]() { | ||
| 3434 | switch (opcode->get().GetId()) { | ||
| 3435 | case OpCode::Id::HSET2_R: | ||
| 3436 | return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||
| 3437 | instr.hset2.type_b, instr.hset2.abs_b != 0, | ||
| 3438 | instr.hset2.negate_b != 0); | ||
| 3439 | default: | ||
| 3440 | UNREACHABLE(); | ||
| 3441 | return std::string("vec2(0)"); | ||
| 3442 | } | ||
| 3443 | }(); | ||
| 3444 | |||
| 3445 | const std::string second_pred = | ||
| 3446 | GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0); | ||
| 3447 | |||
| 3448 | const std::string combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 3449 | |||
| 3450 | // HSET2 operates on each half float in the pack. | ||
| 3451 | std::string result; | ||
| 3452 | for (int i = 0; i < 2; ++i) { | ||
| 3453 | const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000"; | ||
| 3454 | const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000"; | ||
| 3455 | const std::string value = instr.hset2.bf == 1 ? float_value : integer_value; | ||
| 3456 | 1187 | ||
| 3457 | const std::string comp = std::string(".") + "xy"[i]; | 1188 | std::string F4TextureQueryLod(Operation operation) { |
| 3458 | const std::string predicate = | 1189 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3459 | "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + | 1190 | ASSERT(meta); |
| 3460 | ") " + combiner + " (" + second_pred + "))"; | ||
| 3461 | 1191 | ||
| 3462 | result += '(' + predicate + " ? " + value + " : 0)"; | 1192 | if (meta->element < 2) { |
| 3463 | if (i == 0) { | 1193 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + |
| 3464 | result += " | "; | 1194 | GetSwizzle(meta->element) + "))"; |
| 3465 | } | ||
| 3466 | } | ||
| 3467 | regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1); | ||
| 3468 | break; | ||
| 3469 | } | 1195 | } |
| 3470 | case OpCode::Type::Xmad: { | 1196 | return "0"; |
| 3471 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | 1197 | } |
| 3472 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 3473 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3474 | "Condition codes generation in XMAD is partially implemented"); | ||
| 3475 | |||
| 3476 | std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; | ||
| 3477 | std::string op_b; | ||
| 3478 | std::string op_c; | ||
| 3479 | |||
| 3480 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 3481 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 3482 | const bool is_signed{instr.xmad.sign_a == 1}; | ||
| 3483 | |||
| 3484 | bool is_merge{}; | ||
| 3485 | switch (opcode->get().GetId()) { | ||
| 3486 | case OpCode::Id::XMAD_CR: { | ||
| 3487 | is_merge = instr.xmad.merge_56; | ||
| 3488 | op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3489 | instr.xmad.sign_b ? GLSLRegister::Type::Integer | ||
| 3490 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3491 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3492 | break; | ||
| 3493 | } | ||
| 3494 | case OpCode::Id::XMAD_RR: { | ||
| 3495 | is_merge = instr.xmad.merge_37; | ||
| 3496 | op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); | ||
| 3497 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3498 | break; | ||
| 3499 | } | ||
| 3500 | case OpCode::Id::XMAD_RC: { | ||
| 3501 | op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); | ||
| 3502 | op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, | ||
| 3503 | is_signed ? GLSLRegister::Type::Integer | ||
| 3504 | : GLSLRegister::Type::UnsignedInteger); | ||
| 3505 | break; | ||
| 3506 | } | ||
| 3507 | case OpCode::Id::XMAD_IMM: { | ||
| 3508 | is_merge = instr.xmad.merge_37; | ||
| 3509 | op_b += std::to_string(instr.xmad.imm20_16); | ||
| 3510 | op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); | ||
| 3511 | break; | ||
| 3512 | } | ||
| 3513 | default: { | ||
| 3514 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 3515 | } | ||
| 3516 | } | ||
| 3517 | 1198 | ||
| 3518 | // TODO(bunnei): Ensure this is right with signed operands | 1199 | std::string F4TexelFetch(Operation operation) { |
| 3519 | if (instr.xmad.high_a) { | 1200 | constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"}; |
| 3520 | op_a = "((" + op_a + ") >> 16)"; | 1201 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 3521 | } else { | 1202 | const auto count = static_cast<u32>(operation.GetOperandsCount()); |
| 3522 | op_a = "((" + op_a + ") & 0xFFFF)"; | 1203 | ASSERT(meta); |
| 3523 | } | ||
| 3524 | 1204 | ||
| 3525 | std::string src2 = '(' + op_b + ')'; // Preserve original source 2 | 1205 | std::string expr = "texelFetch("; |
| 3526 | if (instr.xmad.high_b) { | 1206 | expr += GetSampler(meta->sampler); |
| 3527 | op_b = '(' + src2 + " >> 16)"; | 1207 | expr += ", "; |
| 3528 | } else { | ||
| 3529 | op_b = '(' + src2 + " & 0xFFFF)"; | ||
| 3530 | } | ||
| 3531 | 1208 | ||
| 3532 | std::string product = '(' + op_a + " * " + op_b + ')'; | 1209 | expr += constructors[meta->coords_count - 1]; |
| 3533 | if (instr.xmad.product_shift_left) { | 1210 | expr += '('; |
| 3534 | product = '(' + product + " << 16)"; | 1211 | for (u32 i = 0; i < count; ++i) { |
| 3535 | } | 1212 | expr += VisitOperand(operation, i, Type::Int); |
| 3536 | 1213 | ||
| 3537 | switch (instr.xmad.mode) { | 1214 | if (i + 1 == meta->coords_count) { |
| 3538 | case Tegra::Shader::XmadMode::None: | 1215 | expr += ')'; |
| 3539 | break; | ||
| 3540 | case Tegra::Shader::XmadMode::CLo: | ||
| 3541 | op_c = "((" + op_c + ") & 0xFFFF)"; | ||
| 3542 | break; | ||
| 3543 | case Tegra::Shader::XmadMode::CHi: | ||
| 3544 | op_c = "((" + op_c + ") >> 16)"; | ||
| 3545 | break; | ||
| 3546 | case Tegra::Shader::XmadMode::CBcc: | ||
| 3547 | op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; | ||
| 3548 | break; | ||
| 3549 | default: { | ||
| 3550 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", | ||
| 3551 | static_cast<u32>(instr.xmad.mode.Value())); | ||
| 3552 | } | ||
| 3553 | } | 1216 | } |
| 3554 | 1217 | if (i + 1 < count) { | |
| 3555 | std::string sum{'(' + product + " + " + op_c + ')'}; | 1218 | expr += ", "; |
| 3556 | if (is_merge) { | ||
| 3557 | sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; | ||
| 3558 | } | 1219 | } |
| 3559 | |||
| 3560 | regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false, | ||
| 3561 | instr.generates_cc); | ||
| 3562 | break; | ||
| 3563 | } | 1220 | } |
| 3564 | default: { | 1221 | expr += ')'; |
| 3565 | switch (opcode->get().GetId()) { | 1222 | return expr + GetSwizzle(meta->element); |
| 3566 | case OpCode::Id::EXIT: { | 1223 | } |
| 3567 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3568 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3569 | "EXIT condition code used: {}", static_cast<u32>(cc)); | ||
| 3570 | |||
| 3571 | if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { | ||
| 3572 | EmitFragmentOutputsWrite(); | ||
| 3573 | } | ||
| 3574 | |||
| 3575 | switch (instr.flow.cond) { | ||
| 3576 | case Tegra::Shader::FlowCondition::Always: | ||
| 3577 | shader.AddLine("return true;"); | ||
| 3578 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 3579 | // If this is an unconditional exit then just end processing here, | ||
| 3580 | // otherwise we have to account for the possibility of the condition | ||
| 3581 | // not being met, so continue processing the next instruction. | ||
| 3582 | offset = PROGRAM_END - 1; | ||
| 3583 | } | ||
| 3584 | break; | ||
| 3585 | |||
| 3586 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 3587 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 3588 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 3589 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 3590 | break; | ||
| 3591 | |||
| 3592 | default: | ||
| 3593 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 3594 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 3595 | } | ||
| 3596 | break; | ||
| 3597 | } | ||
| 3598 | case OpCode::Id::KIL: { | ||
| 3599 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 3600 | 1224 | ||
| 3601 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | 1225 | std::string Branch(Operation operation) { |
| 3602 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | 1226 | const auto target = std::get_if<ImmediateNode>(operation[0]); |
| 3603 | "KIL condition code used: {}", static_cast<u32>(cc)); | 1227 | UNIMPLEMENTED_IF(!target); |
| 3604 | 1228 | ||
| 3605 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain | 1229 | code.AddLine(fmt::format("jmp_to = 0x{:x}u;", target->GetValue())); |
| 3606 | // about unexecuted instructions that may follow this. | 1230 | code.AddLine("break;"); |
| 3607 | shader.AddLine("if (true) {"); | 1231 | return {}; |
| 3608 | ++shader.scope; | 1232 | } |
| 3609 | shader.AddLine("discard;"); | ||
| 3610 | --shader.scope; | ||
| 3611 | shader.AddLine("}"); | ||
| 3612 | 1233 | ||
| 3613 | break; | 1234 | std::string PushFlowStack(Operation operation) { |
| 3614 | } | 1235 | const auto target = std::get_if<ImmediateNode>(operation[0]); |
| 3615 | case OpCode::Id::OUT_R: { | 1236 | UNIMPLEMENTED_IF(!target); |
| 3616 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 3617 | "Stream buffer is not supported"); | ||
| 3618 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3619 | "OUT is expected to be used in a geometry shader."); | ||
| 3620 | |||
| 3621 | if (instr.out.emit) { | ||
| 3622 | // gpr0 is used to store the next address. Hardware returns a pointer but | ||
| 3623 | // we just return the next index with a cyclic cap. | ||
| 3624 | const std::string current{regs.GetRegisterAsInteger(instr.gpr8, 0, false)}; | ||
| 3625 | const std::string next = "((" + current + " + 1" + ") % " + | ||
| 3626 | std::to_string(MAX_GEOMETRY_BUFFERS) + ')'; | ||
| 3627 | shader.AddLine("emit_vertex(" + current + ");"); | ||
| 3628 | regs.SetRegisterToInteger(instr.gpr0, false, 0, next, 1, 1); | ||
| 3629 | } | ||
| 3630 | if (instr.out.cut) { | ||
| 3631 | shader.AddLine("EndPrimitive();"); | ||
| 3632 | } | ||
| 3633 | 1237 | ||
| 3634 | break; | 1238 | code.AddLine(fmt::format("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue())); |
| 3635 | } | 1239 | return {}; |
| 3636 | case OpCode::Id::MOV_SYS: { | 1240 | } |
| 3637 | switch (instr.sys20) { | ||
| 3638 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 3639 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 3640 | regs.SetRegisterToInteger(instr.gpr0, false, 0, "0u", 1, 1); | ||
| 3641 | break; | ||
| 3642 | } | ||
| 3643 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 3644 | // Config pack's third value is Y_NEGATE's state. | ||
| 3645 | regs.SetRegisterToFloat(instr.gpr0, 0, "uintBitsToFloat(config_pack[2])", 1, 1); | ||
| 3646 | break; | ||
| 3647 | } | ||
| 3648 | default: { | ||
| 3649 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | ||
| 3650 | static_cast<u32>(instr.sys20.Value())); | ||
| 3651 | } | ||
| 3652 | } | ||
| 3653 | break; | ||
| 3654 | } | ||
| 3655 | case OpCode::Id::ISBERD: { | ||
| 3656 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 3657 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 3658 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 3659 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 3660 | ASSERT_MSG(stage == Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 3661 | "ISBERD is expected to be used in a geometry shader."); | ||
| 3662 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 3663 | regs.SetRegisterToFloat(instr.gpr0, 0, regs.GetRegisterAsFloat(instr.gpr8), 1, 1); | ||
| 3664 | break; | ||
| 3665 | } | ||
| 3666 | case OpCode::Id::BRA: { | ||
| 3667 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3668 | "BRA with constant buffers are not implemented"); | ||
| 3669 | |||
| 3670 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3671 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3672 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 3673 | const std::string condition_code = regs.GetConditionCode(cc); | ||
| 3674 | shader.AddLine("if (" + condition_code + "){"); | ||
| 3675 | shader.scope++; | ||
| 3676 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3677 | shader.scope--; | ||
| 3678 | shader.AddLine('}'); | ||
| 3679 | } else { | ||
| 3680 | shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }"); | ||
| 3681 | } | ||
| 3682 | break; | ||
| 3683 | } | ||
| 3684 | case OpCode::Id::IPA: { | ||
| 3685 | const auto& attribute = instr.attribute.fmt28; | ||
| 3686 | const auto& reg = instr.gpr0; | ||
| 3687 | |||
| 3688 | Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 3689 | instr.ipa.sample_mode.Value()}; | ||
| 3690 | regs.SetRegisterToInputAttibute(reg, attribute.element, attribute.index, | ||
| 3691 | input_mode); | ||
| 3692 | 1241 | ||
| 3693 | if (instr.ipa.saturate) { | 1242 | std::string PopFlowStack(Operation operation) { |
| 3694 | regs.SetRegisterToFloat(reg, 0, regs.GetRegisterAsFloat(reg), 1, 1, true); | 1243 | code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); |
| 3695 | } | 1244 | code.AddLine("break;"); |
| 3696 | break; | 1245 | return {}; |
| 3697 | } | 1246 | } |
| 3698 | case OpCode::Id::SSY: { | ||
| 3699 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it | ||
| 3700 | // sets the target of the jump that the SYNC instruction will make. The SSY opcode | ||
| 3701 | // has a similar structure to the BRA opcode. | ||
| 3702 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3703 | "Constant buffer flow is not supported"); | ||
| 3704 | |||
| 3705 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3706 | EmitPushToFlowStack(target); | ||
| 3707 | break; | ||
| 3708 | } | ||
| 3709 | case OpCode::Id::PBK: { | ||
| 3710 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with | ||
| 3711 | // SSY but using SYNC on a PBK address will kill the shader execution. We don't | ||
| 3712 | // emulate this because it's very unlikely a driver will emit such invalid shader. | ||
| 3713 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 3714 | "Constant buffer PBK is not supported"); | ||
| 3715 | |||
| 3716 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 3717 | EmitPushToFlowStack(target); | ||
| 3718 | break; | ||
| 3719 | } | ||
| 3720 | case OpCode::Id::SYNC: { | ||
| 3721 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3722 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3723 | "SYNC condition code used: {}", static_cast<u32>(cc)); | ||
| 3724 | 1247 | ||
| 3725 | // The SYNC opcode jumps to the address previously set by the SSY opcode | 1248 | std::string Exit(Operation operation) { |
| 3726 | EmitPopFromFlowStack(); | 1249 | if (stage != ShaderStage::Fragment) { |
| 3727 | break; | 1250 | code.AddLine("return;"); |
| 1251 | return {}; | ||
| 1252 | } | ||
| 1253 | const auto& used_registers = ir.GetRegisters(); | ||
| 1254 | const auto SafeGetRegister = [&](u32 reg) -> std::string { | ||
| 1255 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1256 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1257 | return GetRegister(reg); | ||
| 3728 | } | 1258 | } |
| 3729 | case OpCode::Id::BRK: { | 1259 | return "0.0f"; |
| 3730 | // The BRK opcode jumps to the address previously set by the PBK opcode | 1260 | }; |
| 3731 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 3732 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, | ||
| 3733 | "BRK condition code used: {}", static_cast<u32>(cc)); | ||
| 3734 | 1261 | ||
| 3735 | EmitPopFromFlowStack(); | 1262 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); |
| 3736 | break; | ||
| 3737 | } | ||
| 3738 | case OpCode::Id::DEPBAR: { | ||
| 3739 | // TODO(Subv): Find out if we actually have to care about this instruction or if | ||
| 3740 | // the GLSL compiler takes care of that for us. | ||
| 3741 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 3742 | break; | ||
| 3743 | } | ||
| 3744 | case OpCode::Id::VMAD: { | ||
| 3745 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 3746 | "Condition codes generation in VMAD is not implemented"); | ||
| 3747 | |||
| 3748 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 3749 | const std::string op_a = GetVideoOperandA(instr); | ||
| 3750 | const std::string op_b = GetVideoOperandB(instr); | ||
| 3751 | const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | ||
| 3752 | |||
| 3753 | std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||
| 3754 | |||
| 3755 | switch (instr.vmad.shr) { | ||
| 3756 | case Tegra::Shader::VmadShr::Shr7: | ||
| 3757 | result = '(' + result + " >> 7)"; | ||
| 3758 | break; | ||
| 3759 | case Tegra::Shader::VmadShr::Shr15: | ||
| 3760 | result = '(' + result + " >> 15)"; | ||
| 3761 | break; | ||
| 3762 | } | ||
| 3763 | 1263 | ||
| 3764 | regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | 1264 | code.AddLine("if (alpha_test[0] != 0) {"); |
| 3765 | instr.vmad.saturate, instr.vmad.cc); | 1265 | ++code.scope; |
| 3766 | break; | 1266 | // We start on the register containing the alpha value in the first RT. |
| 1267 | u32 current_reg = 3; | ||
| 1268 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 1269 | // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when | ||
| 1270 | // multiple render targets are used. | ||
| 1271 | if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || | ||
| 1272 | header.ps.IsColorComponentOutputEnabled(render_target, 1) || | ||
| 1273 | header.ps.IsColorComponentOutputEnabled(render_target, 2) || | ||
| 1274 | header.ps.IsColorComponentOutputEnabled(render_target, 3)) { | ||
| 1275 | code.AddLine( | ||
| 1276 | fmt::format("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg))); | ||
| 1277 | current_reg += 4; | ||
| 3767 | } | 1278 | } |
| 3768 | case OpCode::Id::VSETP: { | 1279 | } |
| 3769 | const std::string op_a = GetVideoOperandA(instr); | 1280 | --code.scope; |
| 3770 | const std::string op_b = GetVideoOperandB(instr); | 1281 | code.AddLine('}'); |
| 3771 | |||
| 3772 | // We can't use the constant predicate as destination. | ||
| 3773 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 3774 | |||
| 3775 | const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false); | ||
| 3776 | |||
| 3777 | const std::string combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 3778 | |||
| 3779 | const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b); | ||
| 3780 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 3781 | SetPredicate(instr.vsetp.pred3, | ||
| 3782 | '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||
| 3783 | 1282 | ||
| 3784 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | 1283 | // Write the color outputs using the data in the shader registers, disabled |
| 3785 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | 1284 | // rendertargets/components are skipped in the register assignment. |
| 3786 | // if enabled | 1285 | current_reg = 0; |
| 3787 | SetPredicate(instr.vsetp.pred0, | 1286 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { |
| 3788 | "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | 1287 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1288 | for (u32 component = 0; component < 4; ++component) { | ||
| 1289 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 1290 | code.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | ||
| 1291 | SafeGetRegister(current_reg))); | ||
| 1292 | ++current_reg; | ||
| 3789 | } | 1293 | } |
| 3790 | break; | ||
| 3791 | } | ||
| 3792 | default: { | ||
| 3793 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 3794 | break; | ||
| 3795 | } | ||
| 3796 | } | 1294 | } |
| 3797 | |||
| 3798 | break; | ||
| 3799 | } | ||
| 3800 | } | 1295 | } |
| 3801 | 1296 | ||
| 3802 | // Close the predicate condition scope. | 1297 | if (header.ps.omap.depth) { |
| 3803 | if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { | 1298 | // The depth output is always 2 registers after the last color output, and current_reg |
| 3804 | --shader.scope; | 1299 | // already contains one past the last color register. |
| 3805 | shader.AddLine('}'); | 1300 | code.AddLine("gl_FragDepth = " + SafeGetRegister(current_reg + 1) + ';'); |
| 3806 | } | 1301 | } |
| 3807 | 1302 | ||
| 3808 | return offset + 1; | 1303 | code.AddLine("return;"); |
| 1304 | return {}; | ||
| 3809 | } | 1305 | } |
| 3810 | 1306 | ||
| 3811 | /** | 1307 | std::string Discard(Operation operation) { |
| 3812 | * Compiles a range of instructions from Tegra to GLSL. | 1308 | // Enclose "discard" in a conditional, so that GLSL compilation does not complain |
| 3813 | * @param begin the offset of the starting instruction. | 1309 | // about unexecuted instructions that may follow this. |
| 3814 | * @param end the offset where the compilation should stop (exclusive). | 1310 | code.AddLine("if (true) {"); |
| 3815 | * @return the offset of the next instruction to compile. PROGRAM_END if the program | 1311 | ++code.scope; |
| 3816 | * terminates. | 1312 | code.AddLine("discard;"); |
| 3817 | */ | 1313 | --code.scope; |
| 3818 | u32 CompileRange(u32 begin, u32 end) { | 1314 | code.AddLine("}"); |
| 3819 | u32 program_counter; | 1315 | return {}; |
| 3820 | for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) { | ||
| 3821 | program_counter = CompileInstr(program_counter); | ||
| 3822 | } | ||
| 3823 | return program_counter; | ||
| 3824 | } | 1316 | } |
| 3825 | 1317 | ||
| 3826 | void Generate(const std::string& suffix) { | 1318 | std::string EmitVertex(Operation operation) { |
| 3827 | // Add declarations for all subroutines | 1319 | ASSERT_MSG(stage == ShaderStage::Geometry, |
| 3828 | for (const auto& subroutine : subroutines) { | 1320 | "EmitVertex is expected to be used in a geometry shader."); |
| 3829 | shader.AddLine("bool " + subroutine.GetName() + "();"); | ||
| 3830 | } | ||
| 3831 | shader.AddNewLine(); | ||
| 3832 | |||
| 3833 | // Add the main entry point | ||
| 3834 | shader.AddLine("bool exec_" + suffix + "() {"); | ||
| 3835 | ++shader.scope; | ||
| 3836 | CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); | ||
| 3837 | --shader.scope; | ||
| 3838 | shader.AddLine("}\n"); | ||
| 3839 | |||
| 3840 | // Add definitions for all subroutines | ||
| 3841 | for (const auto& subroutine : subroutines) { | ||
| 3842 | std::set<u32> labels = subroutine.labels; | ||
| 3843 | |||
| 3844 | shader.AddLine("bool " + subroutine.GetName() + "() {"); | ||
| 3845 | ++shader.scope; | ||
| 3846 | |||
| 3847 | if (labels.empty()) { | ||
| 3848 | if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) { | ||
| 3849 | shader.AddLine("return false;"); | ||
| 3850 | } | ||
| 3851 | } else { | ||
| 3852 | labels.insert(subroutine.begin); | ||
| 3853 | shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); | ||
| 3854 | |||
| 3855 | // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems | ||
| 3856 | // unlikely that shaders will use 20 nested SSYs and PBKs. | ||
| 3857 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 3858 | shader.AddLine("uint flow_stack[" + std::to_string(FLOW_STACK_SIZE) + "];"); | ||
| 3859 | shader.AddLine("uint flow_stack_top = 0u;"); | ||
| 3860 | 1321 | ||
| 3861 | shader.AddLine("while (true) {"); | 1322 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| 3862 | ++shader.scope; | 1323 | // fragment). For more info about flipping, refer to gl_shader_gen.cpp. |
| 1324 | code.AddLine("position.xy *= viewport_flip.xy;"); | ||
| 1325 | code.AddLine("gl_Position = position;"); | ||
| 1326 | code.AddLine("position.w = 1.0;"); | ||
| 1327 | code.AddLine("EmitVertex();"); | ||
| 1328 | return {}; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | std::string EndPrimitive(Operation operation) { | ||
| 1332 | ASSERT_MSG(stage == ShaderStage::Geometry, | ||
| 1333 | "EndPrimitive is expected to be used in a geometry shader."); | ||
| 1334 | |||
| 1335 | code.AddLine("EndPrimitive();"); | ||
| 1336 | return {}; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | std::string YNegate(Operation operation) { | ||
| 1340 | // Config pack's third value is Y_NEGATE's state. | ||
| 1341 | return "uintBitsToFloat(config_pack[2])"; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | static constexpr OperationDecompilersArray operation_decompilers = { | ||
| 1345 | &GLSLDecompiler::Assign, | ||
| 1346 | |||
| 1347 | &GLSLDecompiler::Select, | ||
| 1348 | |||
| 1349 | &GLSLDecompiler::Add<Type::Float>, | ||
| 1350 | &GLSLDecompiler::Mul<Type::Float>, | ||
| 1351 | &GLSLDecompiler::Div<Type::Float>, | ||
| 1352 | &GLSLDecompiler::Fma<Type::Float>, | ||
| 1353 | &GLSLDecompiler::Negate<Type::Float>, | ||
| 1354 | &GLSLDecompiler::Absolute<Type::Float>, | ||
| 1355 | &GLSLDecompiler::FClamp, | ||
| 1356 | &GLSLDecompiler::Min<Type::Float>, | ||
| 1357 | &GLSLDecompiler::Max<Type::Float>, | ||
| 1358 | &GLSLDecompiler::FCos, | ||
| 1359 | &GLSLDecompiler::FSin, | ||
| 1360 | &GLSLDecompiler::FExp2, | ||
| 1361 | &GLSLDecompiler::FLog2, | ||
| 1362 | &GLSLDecompiler::FInverseSqrt, | ||
| 1363 | &GLSLDecompiler::FSqrt, | ||
| 1364 | &GLSLDecompiler::FRoundEven, | ||
| 1365 | &GLSLDecompiler::FFloor, | ||
| 1366 | &GLSLDecompiler::FCeil, | ||
| 1367 | &GLSLDecompiler::FTrunc, | ||
| 1368 | &GLSLDecompiler::FCastInteger<Type::Int>, | ||
| 1369 | &GLSLDecompiler::FCastInteger<Type::Uint>, | ||
| 1370 | |||
| 1371 | &GLSLDecompiler::Add<Type::Int>, | ||
| 1372 | &GLSLDecompiler::Mul<Type::Int>, | ||
| 1373 | &GLSLDecompiler::Div<Type::Int>, | ||
| 1374 | &GLSLDecompiler::Negate<Type::Int>, | ||
| 1375 | &GLSLDecompiler::Absolute<Type::Int>, | ||
| 1376 | &GLSLDecompiler::Min<Type::Int>, | ||
| 1377 | &GLSLDecompiler::Max<Type::Int>, | ||
| 1378 | |||
| 1379 | &GLSLDecompiler::ICastFloat, | ||
| 1380 | &GLSLDecompiler::ICastUnsigned, | ||
| 1381 | &GLSLDecompiler::LogicalShiftLeft<Type::Int>, | ||
| 1382 | &GLSLDecompiler::ILogicalShiftRight, | ||
| 1383 | &GLSLDecompiler::IArithmeticShiftRight, | ||
| 1384 | &GLSLDecompiler::BitwiseAnd<Type::Int>, | ||
| 1385 | &GLSLDecompiler::BitwiseOr<Type::Int>, | ||
| 1386 | &GLSLDecompiler::BitwiseXor<Type::Int>, | ||
| 1387 | &GLSLDecompiler::BitwiseNot<Type::Int>, | ||
| 1388 | &GLSLDecompiler::BitfieldInsert<Type::Int>, | ||
| 1389 | &GLSLDecompiler::BitfieldExtract<Type::Int>, | ||
| 1390 | &GLSLDecompiler::BitCount<Type::Int>, | ||
| 1391 | |||
| 1392 | &GLSLDecompiler::Add<Type::Uint>, | ||
| 1393 | &GLSLDecompiler::Mul<Type::Uint>, | ||
| 1394 | &GLSLDecompiler::Div<Type::Uint>, | ||
| 1395 | &GLSLDecompiler::Min<Type::Uint>, | ||
| 1396 | &GLSLDecompiler::Max<Type::Uint>, | ||
| 1397 | &GLSLDecompiler::UCastFloat, | ||
| 1398 | &GLSLDecompiler::UCastSigned, | ||
| 1399 | &GLSLDecompiler::LogicalShiftLeft<Type::Uint>, | ||
| 1400 | &GLSLDecompiler::UShiftRight, | ||
| 1401 | &GLSLDecompiler::UShiftRight, | ||
| 1402 | &GLSLDecompiler::BitwiseAnd<Type::Uint>, | ||
| 1403 | &GLSLDecompiler::BitwiseOr<Type::Uint>, | ||
| 1404 | &GLSLDecompiler::BitwiseXor<Type::Uint>, | ||
| 1405 | &GLSLDecompiler::BitwiseNot<Type::Uint>, | ||
| 1406 | &GLSLDecompiler::BitfieldInsert<Type::Uint>, | ||
| 1407 | &GLSLDecompiler::BitfieldExtract<Type::Uint>, | ||
| 1408 | &GLSLDecompiler::BitCount<Type::Uint>, | ||
| 1409 | |||
| 1410 | &GLSLDecompiler::Add<Type::HalfFloat>, | ||
| 1411 | &GLSLDecompiler::Mul<Type::HalfFloat>, | ||
| 1412 | &GLSLDecompiler::Fma<Type::HalfFloat>, | ||
| 1413 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||
| 1414 | &GLSLDecompiler::HNegate, | ||
| 1415 | &GLSLDecompiler::HMergeF32, | ||
| 1416 | &GLSLDecompiler::HMergeH0, | ||
| 1417 | &GLSLDecompiler::HMergeH1, | ||
| 1418 | &GLSLDecompiler::HPack2, | ||
| 1419 | |||
| 1420 | &GLSLDecompiler::LogicalAssign, | ||
| 1421 | &GLSLDecompiler::LogicalAnd, | ||
| 1422 | &GLSLDecompiler::LogicalOr, | ||
| 1423 | &GLSLDecompiler::LogicalXor, | ||
| 1424 | &GLSLDecompiler::LogicalNegate, | ||
| 1425 | &GLSLDecompiler::LogicalPick2, | ||
| 1426 | &GLSLDecompiler::LogicalAll2, | ||
| 1427 | &GLSLDecompiler::LogicalAny2, | ||
| 1428 | |||
| 1429 | &GLSLDecompiler::LogicalLessThan<Type::Float>, | ||
| 1430 | &GLSLDecompiler::LogicalEqual<Type::Float>, | ||
| 1431 | &GLSLDecompiler::LogicalLessEqual<Type::Float>, | ||
| 1432 | &GLSLDecompiler::LogicalGreaterThan<Type::Float>, | ||
| 1433 | &GLSLDecompiler::LogicalNotEqual<Type::Float>, | ||
| 1434 | &GLSLDecompiler::LogicalGreaterEqual<Type::Float>, | ||
| 1435 | &GLSLDecompiler::LogicalFIsNan, | ||
| 1436 | |||
| 1437 | &GLSLDecompiler::LogicalLessThan<Type::Int>, | ||
| 1438 | &GLSLDecompiler::LogicalEqual<Type::Int>, | ||
| 1439 | &GLSLDecompiler::LogicalLessEqual<Type::Int>, | ||
| 1440 | &GLSLDecompiler::LogicalGreaterThan<Type::Int>, | ||
| 1441 | &GLSLDecompiler::LogicalNotEqual<Type::Int>, | ||
| 1442 | &GLSLDecompiler::LogicalGreaterEqual<Type::Int>, | ||
| 1443 | |||
| 1444 | &GLSLDecompiler::LogicalLessThan<Type::Uint>, | ||
| 1445 | &GLSLDecompiler::LogicalEqual<Type::Uint>, | ||
| 1446 | &GLSLDecompiler::LogicalLessEqual<Type::Uint>, | ||
| 1447 | &GLSLDecompiler::LogicalGreaterThan<Type::Uint>, | ||
| 1448 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, | ||
| 1449 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, | ||
| 1450 | |||
| 1451 | &GLSLDecompiler::Logical2HLessThan, | ||
| 1452 | &GLSLDecompiler::Logical2HEqual, | ||
| 1453 | &GLSLDecompiler::Logical2HLessEqual, | ||
| 1454 | &GLSLDecompiler::Logical2HGreaterThan, | ||
| 1455 | &GLSLDecompiler::Logical2HNotEqual, | ||
| 1456 | &GLSLDecompiler::Logical2HGreaterEqual, | ||
| 1457 | |||
| 1458 | &GLSLDecompiler::F4Texture, | ||
| 1459 | &GLSLDecompiler::F4TextureLod, | ||
| 1460 | &GLSLDecompiler::F4TextureGather, | ||
| 1461 | &GLSLDecompiler::F4TextureQueryDimensions, | ||
| 1462 | &GLSLDecompiler::F4TextureQueryLod, | ||
| 1463 | &GLSLDecompiler::F4TexelFetch, | ||
| 1464 | |||
| 1465 | &GLSLDecompiler::Branch, | ||
| 1466 | &GLSLDecompiler::PushFlowStack, | ||
| 1467 | &GLSLDecompiler::PopFlowStack, | ||
| 1468 | &GLSLDecompiler::Exit, | ||
| 1469 | &GLSLDecompiler::Discard, | ||
| 1470 | |||
| 1471 | &GLSLDecompiler::EmitVertex, | ||
| 1472 | &GLSLDecompiler::EndPrimitive, | ||
| 1473 | |||
| 1474 | &GLSLDecompiler::YNegate, | ||
| 1475 | }; | ||
| 3863 | 1476 | ||
| 3864 | shader.AddLine("switch (jmp_to) {"); | 1477 | std::string GetRegister(u32 index) const { |
| 1478 | return GetDeclarationWithSuffix(index, "gpr"); | ||
| 1479 | } | ||
| 3865 | 1480 | ||
| 3866 | for (auto label : labels) { | 1481 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 3867 | shader.AddLine("case " + std::to_string(label) + "u: {"); | 1482 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); |
| 3868 | ++shader.scope; | 1483 | } |
| 3869 | 1484 | ||
| 3870 | const auto next_it = labels.lower_bound(label + 1); | 1485 | std::string GetInputAttribute(Attribute::Index attribute) const { |
| 3871 | const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it; | 1486 | const auto index{static_cast<u32>(attribute) - |
| 1487 | static_cast<u32>(Attribute::Index::Attribute_0)}; | ||
| 1488 | return GetDeclarationWithSuffix(index, "input_attr"); | ||
| 1489 | } | ||
| 3872 | 1490 | ||
| 3873 | const u32 compile_end = CompileRange(label, next_label); | 1491 | std::string GetOutputAttribute(Attribute::Index attribute) const { |
| 3874 | if (compile_end > next_label && compile_end != PROGRAM_END) { | 1492 | const auto index{static_cast<u32>(attribute) - |
| 3875 | // This happens only when there is a label inside a IF/LOOP block | 1493 | static_cast<u32>(Attribute::Index::Attribute_0)}; |
| 3876 | shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); | 1494 | return GetDeclarationWithSuffix(index, "output_attr"); |
| 3877 | labels.emplace(compile_end); | 1495 | } |
| 3878 | } | ||
| 3879 | 1496 | ||
| 3880 | --shader.scope; | 1497 | std::string GetConstBuffer(u32 index) const { |
| 3881 | shader.AddLine('}'); | 1498 | return GetDeclarationWithSuffix(index, "cbuf"); |
| 3882 | } | 1499 | } |
| 3883 | 1500 | ||
| 3884 | shader.AddLine("default: return false;"); | 1501 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { |
| 3885 | shader.AddLine('}'); | 1502 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); |
| 1503 | } | ||
| 3886 | 1504 | ||
| 3887 | --shader.scope; | 1505 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { |
| 3888 | shader.AddLine('}'); | 1506 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, |
| 1507 | suffix); | ||
| 1508 | } | ||
| 3889 | 1509 | ||
| 3890 | shader.AddLine("return false;"); | 1510 | std::string GetConstBufferBlock(u32 index) const { |
| 3891 | } | 1511 | return GetDeclarationWithSuffix(index, "cbuf_block"); |
| 1512 | } | ||
| 3892 | 1513 | ||
| 3893 | --shader.scope; | 1514 | std::string GetLocalMemory() const { |
| 3894 | shader.AddLine("}\n"); | 1515 | return "lmem_" + suffix; |
| 1516 | } | ||
| 3895 | 1517 | ||
| 3896 | DEBUG_ASSERT(shader.scope == 0); | 1518 | std::string GetInternalFlag(InternalFlag flag) const { |
| 3897 | } | 1519 | constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag", |
| 1520 | "carry_flag", "overflow_flag"}; | ||
| 1521 | const auto index = static_cast<u32>(flag); | ||
| 1522 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | ||
| 3898 | 1523 | ||
| 3899 | GenerateDeclarations(); | 1524 | return std::string(InternalFlagNames[index]) + '_' + suffix; |
| 3900 | } | 1525 | } |
| 3901 | 1526 | ||
| 3902 | /// Add declarations for registers | 1527 | std::string GetSampler(const Sampler& sampler) const { |
| 3903 | void GenerateDeclarations() { | 1528 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 3904 | regs.GenerateDeclarations(suffix); | 1529 | } |
| 3905 | 1530 | ||
| 3906 | for (const auto& pred : declr_predicates) { | 1531 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { |
| 3907 | declarations.AddLine("bool " + pred + " = false;"); | 1532 | return name + '_' + std::to_string(index) + '_' + suffix; |
| 3908 | } | ||
| 3909 | declarations.AddNewLine(); | ||
| 3910 | } | 1533 | } |
| 3911 | 1534 | ||
| 3912 | private: | 1535 | const ShaderIR& ir; |
| 3913 | const std::set<Subroutine>& subroutines; | 1536 | const ShaderStage stage; |
| 3914 | const ProgramCode& program_code; | 1537 | const std::string suffix; |
| 3915 | Tegra::Shader::Header header; | 1538 | const Header header; |
| 3916 | const u32 main_offset; | 1539 | |
| 3917 | Maxwell3D::Regs::ShaderStage stage; | 1540 | ShaderWriter code; |
| 3918 | const std::string& suffix; | 1541 | }; |
| 3919 | u64 local_memory_size; | ||
| 3920 | std::size_t shader_length; | ||
| 3921 | |||
| 3922 | ShaderWriter shader; | ||
| 3923 | ShaderWriter declarations; | ||
| 3924 | GLSLRegisterManager regs{shader, declarations, stage, suffix, header}; | ||
| 3925 | |||
| 3926 | // Declarations | ||
| 3927 | std::set<std::string> declr_predicates; | ||
| 3928 | }; // namespace OpenGL::GLShader::Decompiler | ||
| 3929 | 1542 | ||
| 3930 | std::string GetCommonDeclarations() { | 1543 | std::string GetCommonDeclarations() { |
| 3931 | return fmt::format("#define MAX_CONSTBUFFER_ELEMENTS {}\n", | 1544 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |
| 3932 | RasterizerOpenGL::MaxConstbufferSize / sizeof(GLvec4)); | 1545 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |
| 1546 | return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + | ||
| 1547 | "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + | ||
| 1548 | "#define ftoi floatBitsToInt\n" | ||
| 1549 | "#define ftou floatBitsToUint\n" | ||
| 1550 | "#define itof intBitsToFloat\n" | ||
| 1551 | "#define utof uintBitsToFloat\n\n" | ||
| 1552 | "float fromHalf2(vec2 pair) {\n" | ||
| 1553 | " return utof(packHalf2x16(pair));\n" | ||
| 1554 | "}\n\n" | ||
| 1555 | "vec2 toHalf2(float value) {\n" | ||
| 1556 | " return unpackHalf2x16(ftou(value));\n" | ||
| 1557 | "}\n"; | ||
| 3933 | } | 1558 | } |
| 3934 | 1559 | ||
| 3935 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | 1560 | ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) { |
| 3936 | Maxwell3D::Regs::ShaderStage stage, | 1561 | GLSLDecompiler decompiler(ir, stage, suffix); |
| 3937 | const std::string& suffix) { | 1562 | decompiler.Decompile(); |
| 3938 | try { | 1563 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; |
| 3939 | ControlFlowAnalyzer analyzer(program_code, main_offset, suffix); | ||
| 3940 | const auto subroutines = analyzer.GetSubroutines(); | ||
| 3941 | GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix, | ||
| 3942 | analyzer.GetShaderLength()); | ||
| 3943 | return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; | ||
| 3944 | } catch (const DecompileFail& exception) { | ||
| 3945 | LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); | ||
| 3946 | } | ||
| 3947 | return {}; | ||
| 3948 | } | 1564 | } |
| 3949 | 1565 | ||
| 3950 | } // namespace OpenGL::GLShader::Decompiler | 1566 | } // namespace OpenGL::GLShader \ No newline at end of file |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index d01a4a7ee..0856a1361 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -5,21 +5,106 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <functional> | ||
| 9 | #include <optional> | ||
| 10 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL::GLShader::Decompiler { | 15 | namespace VideoCommon::Shader { |
| 16 | class ShaderIR; | ||
| 17 | } | ||
| 16 | 18 | ||
| 17 | using Tegra::Engines::Maxwell3D; | 19 | namespace OpenGL::GLShader { |
| 20 | |||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 22 | |||
| 23 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 24 | public: | ||
| 25 | explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, | ||
| 26 | Maxwell::ShaderStage stage, const std::string& name, u32 index) | ||
| 27 | : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {} | ||
| 28 | |||
| 29 | const std::string& GetName() const { | ||
| 30 | return name; | ||
| 31 | } | ||
| 32 | |||
| 33 | Maxwell::ShaderStage GetStage() const { | ||
| 34 | return stage; | ||
| 35 | } | ||
| 36 | |||
| 37 | u32 GetIndex() const { | ||
| 38 | return index; | ||
| 39 | } | ||
| 40 | |||
| 41 | private: | ||
| 42 | std::string name; | ||
| 43 | Maxwell::ShaderStage stage{}; | ||
| 44 | u32 index{}; | ||
| 45 | }; | ||
| 46 | |||
| 47 | class SamplerEntry : public VideoCommon::Shader::Sampler { | ||
| 48 | public: | ||
| 49 | explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage, | ||
| 50 | const std::string& name) | ||
| 51 | : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {} | ||
| 52 | |||
| 53 | const std::string& GetName() const { | ||
| 54 | return name; | ||
| 55 | } | ||
| 56 | |||
| 57 | Maxwell::ShaderStage GetStage() const { | ||
| 58 | return stage; | ||
| 59 | } | ||
| 60 | |||
| 61 | private: | ||
| 62 | std::string name; | ||
| 63 | Maxwell::ShaderStage stage{}; | ||
| 64 | }; | ||
| 65 | |||
| 66 | class GlobalMemoryEntry { | ||
| 67 | public: | ||
| 68 | explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, | ||
| 69 | std::string name) | ||
| 70 | : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {} | ||
| 71 | |||
| 72 | u32 GetCbufIndex() const { | ||
| 73 | return cbuf_index; | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 GetCbufOffset() const { | ||
| 77 | return cbuf_offset; | ||
| 78 | } | ||
| 79 | |||
| 80 | const std::string& GetName() const { | ||
| 81 | return name; | ||
| 82 | } | ||
| 83 | |||
| 84 | Maxwell::ShaderStage GetStage() const { | ||
| 85 | return stage; | ||
| 86 | } | ||
| 87 | |||
| 88 | private: | ||
| 89 | u32 cbuf_index{}; | ||
| 90 | u32 cbuf_offset{}; | ||
| 91 | Maxwell::ShaderStage stage{}; | ||
| 92 | std::string name; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct ShaderEntries { | ||
| 96 | std::vector<ConstBufferEntry> const_buffers; | ||
| 97 | std::vector<SamplerEntry> samplers; | ||
| 98 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 99 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 100 | std::size_t shader_length{}; | ||
| 101 | }; | ||
| 102 | |||
| 103 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 18 | 104 | ||
| 19 | std::string GetCommonDeclarations(); | 105 | std::string GetCommonDeclarations(); |
| 20 | 106 | ||
| 21 | std::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | 107 | ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, |
| 22 | Maxwell3D::Regs::ShaderStage stage, | 108 | const std::string& suffix); |
| 23 | const std::string& suffix); | ||
| 24 | 109 | ||
| 25 | } // namespace OpenGL::GLShader::Decompiler | 110 | } // namespace OpenGL::GLShader \ No newline at end of file |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 5d0819dc5..04e1db911 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -7,63 +7,57 @@ | |||
| 7 | #include "video_core/engines/maxwell_3d.h" | 7 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL::GLShader { |
| 12 | 13 | ||
| 13 | using Tegra::Engines::Maxwell3D; | 14 | using Tegra::Engines::Maxwell3D; |
| 15 | using VideoCommon::Shader::ProgramCode; | ||
| 16 | using VideoCommon::Shader::ShaderIR; | ||
| 14 | 17 | ||
| 15 | static constexpr u32 PROGRAM_OFFSET{10}; | 18 | static constexpr u32 PROGRAM_OFFSET{10}; |
| 16 | 19 | ||
| 17 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 18 | std::string out = "#version 430 core\n"; | ||
| 19 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 22 | |||
| 23 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 21 | out += "// Shader Unique Id: VS" + id + "\n\n"; | 24 | out += "// Shader Unique Id: VS" + id + "\n\n"; |
| 22 | out += Decompiler::GetCommonDeclarations(); | 25 | out += GetCommonDeclarations(); |
| 23 | 26 | ||
| 24 | out += R"( | 27 | out += R"( |
| 25 | |||
| 26 | layout (location = 0) out vec4 position; | 28 | layout (location = 0) out vec4 position; |
| 27 | 29 | ||
| 28 | layout(std140) uniform vs_config { | 30 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { |
| 29 | vec4 viewport_flip; | 31 | vec4 viewport_flip; |
| 30 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 32 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 31 | uvec4 alpha_test; | 33 | uvec4 alpha_test; |
| 32 | }; | 34 | }; |
| 33 | )"; | ||
| 34 | |||
| 35 | if (setup.IsDualProgram()) { | ||
| 36 | out += "bool exec_vertex_b();\n"; | ||
| 37 | } | ||
| 38 | 35 | ||
| 39 | ProgramResult program = | 36 | )"; |
| 40 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | 37 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); |
| 41 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex") | 38 | ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); |
| 42 | .value_or(ProgramResult()); | ||
| 43 | 39 | ||
| 44 | out += program.first; | 40 | out += program.first; |
| 45 | 41 | ||
| 46 | if (setup.IsDualProgram()) { | 42 | if (setup.IsDualProgram()) { |
| 43 | ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | ||
| 47 | ProgramResult program_b = | 44 | ProgramResult program_b = |
| 48 | Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET, | 45 | Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); |
| 49 | Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b") | 46 | |
| 50 | .value_or(ProgramResult()); | ||
| 51 | out += program_b.first; | 47 | out += program_b.first; |
| 52 | } | 48 | } |
| 53 | 49 | ||
| 54 | out += R"( | 50 | out += R"( |
| 55 | |||
| 56 | void main() { | 51 | void main() { |
| 57 | position = vec4(0.0, 0.0, 0.0, 0.0); | 52 | position = vec4(0.0, 0.0, 0.0, 0.0); |
| 58 | exec_vertex(); | 53 | execute_vertex(); |
| 59 | )"; | 54 | )"; |
| 60 | 55 | ||
| 61 | if (setup.IsDualProgram()) { | 56 | if (setup.IsDualProgram()) { |
| 62 | out += " exec_vertex_b();"; | 57 | out += " execute_vertex_b();"; |
| 63 | } | 58 | } |
| 64 | 59 | ||
| 65 | out += R"( | 60 | out += R"( |
| 66 | |||
| 67 | // Check if the flip stage is VertexB | 61 | // Check if the flip stage is VertexB |
| 68 | // Config pack's second value is flip_stage | 62 | // Config pack's second value is flip_stage |
| 69 | if (config_pack[1] == 1) { | 63 | if (config_pack[1] == 1) { |
| @@ -77,73 +71,62 @@ void main() { | |||
| 77 | if (config_pack[1] == 1) { | 71 | if (config_pack[1] == 1) { |
| 78 | position.w = 1.0; | 72 | position.w = 1.0; |
| 79 | } | 73 | } |
| 80 | } | 74 | })"; |
| 81 | |||
| 82 | )"; | ||
| 83 | 75 | ||
| 84 | return {out, program.second}; | 76 | return {out, program.second}; |
| 85 | } | 77 | } |
| 86 | 78 | ||
| 87 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 79 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { |
| 88 | // Version is intentionally skipped in shader generation, it's added by the lazy compilation. | ||
| 89 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 90 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 80 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 81 | |||
| 82 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 91 | out += "// Shader Unique Id: GS" + id + "\n\n"; | 83 | out += "// Shader Unique Id: GS" + id + "\n\n"; |
| 92 | out += Decompiler::GetCommonDeclarations(); | 84 | out += GetCommonDeclarations(); |
| 93 | out += "bool exec_geometry();\n"; | ||
| 94 | 85 | ||
| 95 | ProgramResult program = | ||
| 96 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 97 | Maxwell3D::Regs::ShaderStage::Geometry, "geometry") | ||
| 98 | .value_or(ProgramResult()); | ||
| 99 | out += R"( | 86 | out += R"( |
| 100 | out gl_PerVertex { | ||
| 101 | vec4 gl_Position; | ||
| 102 | }; | ||
| 103 | |||
| 104 | layout (location = 0) in vec4 gs_position[]; | 87 | layout (location = 0) in vec4 gs_position[]; |
| 105 | layout (location = 0) out vec4 position; | 88 | layout (location = 0) out vec4 position; |
| 106 | 89 | ||
| 107 | layout (std140) uniform gs_config { | 90 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { |
| 108 | vec4 viewport_flip; | 91 | vec4 viewport_flip; |
| 109 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 92 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 110 | uvec4 alpha_test; | 93 | uvec4 alpha_test; |
| 111 | }; | 94 | }; |
| 112 | 95 | ||
| 113 | void main() { | ||
| 114 | exec_geometry(); | ||
| 115 | } | ||
| 116 | |||
| 117 | )"; | 96 | )"; |
| 97 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 98 | ProgramResult program = | ||
| 99 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||
| 118 | out += program.first; | 100 | out += program.first; |
| 101 | |||
| 102 | out += R"( | ||
| 103 | void main() { | ||
| 104 | execute_geometry(); | ||
| 105 | };)"; | ||
| 106 | |||
| 119 | return {out, program.second}; | 107 | return {out, program.second}; |
| 120 | } | 108 | } |
| 121 | 109 | ||
| 122 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 110 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 123 | std::string out = "#version 430 core\n"; | ||
| 124 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 125 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 111 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 112 | |||
| 113 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||
| 126 | out += "// Shader Unique Id: FS" + id + "\n\n"; | 114 | out += "// Shader Unique Id: FS" + id + "\n\n"; |
| 127 | out += Decompiler::GetCommonDeclarations(); | 115 | out += GetCommonDeclarations(); |
| 128 | out += "bool exec_fragment();\n"; | ||
| 129 | 116 | ||
| 130 | ProgramResult program = | ||
| 131 | Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET, | ||
| 132 | Maxwell3D::Regs::ShaderStage::Fragment, "fragment") | ||
| 133 | .value_or(ProgramResult()); | ||
| 134 | out += R"( | 117 | out += R"( |
| 135 | layout(location = 0) out vec4 FragColor0; | 118 | layout (location = 0) out vec4 FragColor0; |
| 136 | layout(location = 1) out vec4 FragColor1; | 119 | layout (location = 1) out vec4 FragColor1; |
| 137 | layout(location = 2) out vec4 FragColor2; | 120 | layout (location = 2) out vec4 FragColor2; |
| 138 | layout(location = 3) out vec4 FragColor3; | 121 | layout (location = 3) out vec4 FragColor3; |
| 139 | layout(location = 4) out vec4 FragColor4; | 122 | layout (location = 4) out vec4 FragColor4; |
| 140 | layout(location = 5) out vec4 FragColor5; | 123 | layout (location = 5) out vec4 FragColor5; |
| 141 | layout(location = 6) out vec4 FragColor6; | 124 | layout (location = 6) out vec4 FragColor6; |
| 142 | layout(location = 7) out vec4 FragColor7; | 125 | layout (location = 7) out vec4 FragColor7; |
| 143 | 126 | ||
| 144 | layout (location = 0) in vec4 position; | 127 | layout (location = 0) in vec4 position; |
| 145 | 128 | ||
| 146 | layout (std140) uniform fs_config { | 129 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { |
| 147 | vec4 viewport_flip; | 130 | vec4 viewport_flip; |
| 148 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | 131 | uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
| 149 | uvec4 alpha_test; | 132 | uvec4 alpha_test; |
| @@ -173,12 +156,20 @@ bool AlphaFunc(in float value) { | |||
| 173 | } | 156 | } |
| 174 | } | 157 | } |
| 175 | 158 | ||
| 159 | )"; | ||
| 160 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | ||
| 161 | ProgramResult program = | ||
| 162 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 163 | |||
| 164 | out += program.first; | ||
| 165 | |||
| 166 | out += R"( | ||
| 176 | void main() { | 167 | void main() { |
| 177 | exec_fragment(); | 168 | execute_fragment(); |
| 178 | } | 169 | } |
| 179 | 170 | ||
| 180 | )"; | 171 | )"; |
| 181 | out += program.first; | ||
| 182 | return {out, program.second}; | 172 | return {out, program.second}; |
| 183 | } | 173 | } |
| 184 | } // namespace OpenGL::GLShader | 174 | |
| 175 | } // namespace OpenGL::GLShader \ No newline at end of file | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fcc20d3b4..ac5e6917b 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -10,164 +10,12 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 13 | 15 | ||
| 14 | namespace OpenGL::GLShader { | 16 | namespace OpenGL::GLShader { |
| 15 | 17 | ||
| 16 | constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | 18 | using VideoCommon::Shader::ProgramCode; |
| 17 | using ProgramCode = std::vector<u64>; | ||
| 18 | |||
| 19 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | ||
| 20 | |||
| 21 | class ConstBufferEntry { | ||
| 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 23 | |||
| 24 | public: | ||
| 25 | void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) { | ||
| 26 | is_used = true; | ||
| 27 | this->index = static_cast<unsigned>(index); | ||
| 28 | this->stage = stage; | ||
| 29 | max_offset = std::max(max_offset, static_cast<unsigned>(offset)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) { | ||
| 33 | is_used = true; | ||
| 34 | is_indirect = true; | ||
| 35 | this->index = static_cast<unsigned>(index); | ||
| 36 | this->stage = stage; | ||
| 37 | } | ||
| 38 | |||
| 39 | bool IsUsed() const { | ||
| 40 | return is_used; | ||
| 41 | } | ||
| 42 | |||
| 43 | bool IsIndirect() const { | ||
| 44 | return is_indirect; | ||
| 45 | } | ||
| 46 | |||
| 47 | unsigned GetIndex() const { | ||
| 48 | return index; | ||
| 49 | } | ||
| 50 | |||
| 51 | unsigned GetSize() const { | ||
| 52 | return max_offset + 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string GetName() const { | ||
| 56 | return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index); | ||
| 57 | } | ||
| 58 | |||
| 59 | u32 GetHash() const { | ||
| 60 | return (static_cast<u32>(stage) << 16) | index; | ||
| 61 | } | ||
| 62 | |||
| 63 | private: | ||
| 64 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> BufferBaseNames = { | ||
| 65 | "buffer_vs_c", "buffer_tessc_c", "buffer_tesse_c", "buffer_gs_c", "buffer_fs_c", | ||
| 66 | }; | ||
| 67 | |||
| 68 | bool is_used{}; | ||
| 69 | bool is_indirect{}; | ||
| 70 | unsigned index{}; | ||
| 71 | unsigned max_offset{}; | ||
| 72 | Maxwell::ShaderStage stage; | ||
| 73 | }; | ||
| 74 | |||
| 75 | class SamplerEntry { | ||
| 76 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 77 | |||
| 78 | public: | ||
| 79 | SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, | ||
| 80 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow) | ||
| 81 | : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array), | ||
| 82 | is_shadow(is_shadow) {} | ||
| 83 | |||
| 84 | std::size_t GetOffset() const { | ||
| 85 | return offset; | ||
| 86 | } | ||
| 87 | |||
| 88 | std::size_t GetIndex() const { | ||
| 89 | return sampler_index; | ||
| 90 | } | ||
| 91 | |||
| 92 | Maxwell::ShaderStage GetStage() const { | ||
| 93 | return stage; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::string GetName() const { | ||
| 97 | return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' + | ||
| 98 | std::to_string(sampler_index); | ||
| 99 | } | ||
| 100 | |||
| 101 | std::string GetTypeString() const { | ||
| 102 | using Tegra::Shader::TextureType; | ||
| 103 | std::string glsl_type; | ||
| 104 | |||
| 105 | switch (type) { | ||
| 106 | case TextureType::Texture1D: | ||
| 107 | glsl_type = "sampler1D"; | ||
| 108 | break; | ||
| 109 | case TextureType::Texture2D: | ||
| 110 | glsl_type = "sampler2D"; | ||
| 111 | break; | ||
| 112 | case TextureType::Texture3D: | ||
| 113 | glsl_type = "sampler3D"; | ||
| 114 | break; | ||
| 115 | case TextureType::TextureCube: | ||
| 116 | glsl_type = "samplerCube"; | ||
| 117 | break; | ||
| 118 | default: | ||
| 119 | UNIMPLEMENTED(); | ||
| 120 | } | ||
| 121 | if (is_array) | ||
| 122 | glsl_type += "Array"; | ||
| 123 | if (is_shadow) | ||
| 124 | glsl_type += "Shadow"; | ||
| 125 | return glsl_type; | ||
| 126 | } | ||
| 127 | |||
| 128 | Tegra::Shader::TextureType GetType() const { | ||
| 129 | return type; | ||
| 130 | } | ||
| 131 | |||
| 132 | bool IsArray() const { | ||
| 133 | return is_array; | ||
| 134 | } | ||
| 135 | |||
| 136 | bool IsShadow() const { | ||
| 137 | return is_shadow; | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetHash() const { | ||
| 141 | return (static_cast<u32>(stage) << 16) | static_cast<u32>(sampler_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | static std::string GetArrayName(Maxwell::ShaderStage stage) { | ||
| 145 | return TextureSamplerNames[static_cast<std::size_t>(stage)]; | ||
| 146 | } | ||
| 147 | |||
| 148 | private: | ||
| 149 | static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = { | ||
| 150 | "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs", | ||
| 151 | }; | ||
| 152 | |||
| 153 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 154 | /// instruction. | ||
| 155 | std::size_t offset; | ||
| 156 | Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. | ||
| 157 | std::size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. | ||
| 158 | Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) | ||
| 159 | bool is_array; ///< Whether the texture is being sampled as an array texture or not. | ||
| 160 | bool is_shadow; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct ShaderEntries { | ||
| 164 | std::vector<ConstBufferEntry> const_buffer_entries; | ||
| 165 | std::vector<SamplerEntry> texture_samplers; | ||
| 166 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> clip_distances; | ||
| 167 | std::size_t shader_length; | ||
| 168 | }; | ||
| 169 | |||
| 170 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 171 | 19 | ||
| 172 | struct ShaderSetup { | 20 | struct ShaderSetup { |
| 173 | explicit ShaderSetup(ProgramCode program_code) { | 21 | explicit ShaderSetup(ProgramCode program_code) { |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp new file mode 100644 index 000000000..812983a99 --- /dev/null +++ b/src/video_core/shader/decode.cpp | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <set> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/engines/shader_header.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | |||
| 18 | using Tegra::Shader::Instruction; | ||
| 19 | using Tegra::Shader::OpCode; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | /// Merges exit method of two parallel branches. | ||
| 24 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 25 | if (a == ExitMethod::Undetermined) { | ||
| 26 | return b; | ||
| 27 | } | ||
| 28 | if (b == ExitMethod::Undetermined) { | ||
| 29 | return a; | ||
| 30 | } | ||
| 31 | if (a == b) { | ||
| 32 | return a; | ||
| 33 | } | ||
| 34 | return ExitMethod::Conditional; | ||
| 35 | } | ||
| 36 | |||
| 37 | /** | ||
| 38 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 39 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 40 | */ | ||
| 41 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 42 | constexpr u32 SchedPeriod = 4; | ||
| 43 | u32 absolute_offset = offset - main_offset; | ||
| 44 | |||
| 45 | return (absolute_offset % SchedPeriod) == 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace | ||
| 49 | |||
| 50 | void ShaderIR::Decode() { | ||
| 51 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 52 | |||
| 53 | std::set<u32> labels; | ||
| 54 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | ||
| 55 | if (exit_method != ExitMethod::AlwaysEnd) { | ||
| 56 | UNREACHABLE_MSG("Program does not always end"); | ||
| 57 | } | ||
| 58 | |||
| 59 | if (labels.empty()) { | ||
| 60 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | |||
| 64 | labels.insert(main_offset); | ||
| 65 | |||
| 66 | for (const u32 label : labels) { | ||
| 67 | const auto next_it = labels.lower_bound(label + 1); | ||
| 68 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | ||
| 69 | |||
| 70 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 75 | const auto [iter, inserted] = | ||
| 76 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 77 | ExitMethod& exit_method = iter->second; | ||
| 78 | if (!inserted) | ||
| 79 | return exit_method; | ||
| 80 | |||
| 81 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 82 | coverage_begin = std::min(coverage_begin, offset); | ||
| 83 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 84 | |||
| 85 | const Instruction instr = {program_code[offset]}; | ||
| 86 | const auto opcode = OpCode::Decode(instr); | ||
| 87 | if (!opcode) | ||
| 88 | continue; | ||
| 89 | switch (opcode->get().GetId()) { | ||
| 90 | case OpCode::Id::EXIT: { | ||
| 91 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 92 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 93 | // and check the exit method of that other basic block. | ||
| 94 | using Tegra::Shader::Pred; | ||
| 95 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 96 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 97 | } else { | ||
| 98 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 99 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | case OpCode::Id::BRA: { | ||
| 103 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 104 | labels.insert(target); | ||
| 105 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 106 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 107 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 108 | } | ||
| 109 | case OpCode::Id::SSY: | ||
| 110 | case OpCode::Id::PBK: { | ||
| 111 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 112 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 113 | "Constant buffer branching is not supported"); | ||
| 114 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 115 | labels.insert(target); | ||
| 116 | // Continue scanning for an exit method. | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 122 | } | ||
| 123 | |||
| 124 | BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 125 | BasicBlock basic_block; | ||
| 126 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 127 | pc = DecodeInstr(basic_block, pc); | ||
| 128 | } | ||
| 129 | return basic_block; | ||
| 130 | } | ||
| 131 | |||
| 132 | u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||
| 133 | // Ignore sched instructions when generating code. | ||
| 134 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 135 | return pc + 1; | ||
| 136 | } | ||
| 137 | |||
| 138 | const Instruction instr = {program_code[pc]}; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | |||
| 141 | // Decoding failure | ||
| 142 | if (!opcode) { | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 144 | return pc + 1; | ||
| 145 | } | ||
| 146 | |||
| 147 | bb.push_back( | ||
| 148 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | ||
| 149 | |||
| 150 | using Tegra::Shader::Pred; | ||
| 151 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 152 | "NeverExecute predicate not implemented"); | ||
| 153 | |||
| 154 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> | ||
| 155 | decoders = { | ||
| 156 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 157 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 158 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 159 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 160 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 161 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 162 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 163 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 164 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 165 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 166 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 167 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 168 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 169 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 170 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 171 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 172 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 173 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 174 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 175 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 176 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 177 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 178 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 179 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 180 | }; | ||
| 181 | |||
| 182 | std::vector<Node> tmp_block; | ||
| 183 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 184 | pc = (this->*decoder->second)(tmp_block, bb, pc); | ||
| 185 | } else { | ||
| 186 | pc = DecodeOther(tmp_block, bb, pc); | ||
| 187 | } | ||
| 188 | |||
| 189 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 190 | // executed. | ||
| 191 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 192 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 193 | |||
| 194 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 195 | bb.push_back( | ||
| 196 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); | ||
| 197 | } else { | ||
| 198 | for (auto& node : tmp_block) { | ||
| 199 | bb.push_back(std::move(node)); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return pc + 1; | ||
| 204 | } | ||
| 205 | |||
| 206 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp new file mode 100644 index 000000000..51b8d55d4 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -0,0 +1,155 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::SubOp; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | Node op_b = [&]() -> Node { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::MOV_C: | ||
| 34 | case OpCode::Id::MOV_R: { | ||
| 35 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 36 | SetRegister(bb, instr.gpr0, op_b); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FMUL_C: | ||
| 40 | case OpCode::Id::FMUL_R: | ||
| 41 | case OpCode::Id::FMUL_IMM: { | ||
| 42 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 43 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | ||
| 44 | instr.fmul.tab5cb8_2.Value()); | ||
| 45 | UNIMPLEMENTED_IF_MSG( | ||
| 46 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 47 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | ||
| 48 | |||
| 49 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 50 | |||
| 51 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 52 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 53 | |||
| 54 | if (instr.fmul.postfactor != 0) { | ||
| 55 | auto postfactor = static_cast<s32>(instr.fmul.postfactor); | ||
| 56 | |||
| 57 | // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below | ||
| 58 | // logic. | ||
| 59 | if (postfactor >= 4) { | ||
| 60 | postfactor = 7 - postfactor; | ||
| 61 | } else { | ||
| 62 | postfactor = 0 - postfactor; | ||
| 63 | } | ||
| 64 | |||
| 65 | if (postfactor > 0) { | ||
| 66 | value = Operation(OperationCode::FMul, NO_PRECISE, value, | ||
| 67 | Immediate(static_cast<f32>(1 << postfactor))); | ||
| 68 | } else { | ||
| 69 | value = Operation(OperationCode::FDiv, NO_PRECISE, value, | ||
| 70 | Immediate(static_cast<f32>(1 << -postfactor))); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 114 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | }(); | ||
| 118 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 119 | |||
| 120 | SetRegister(bb, instr.gpr0, value); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::FMNMX_C: | ||
| 124 | case OpCode::Id::FMNMX_R: | ||
| 125 | case OpCode::Id::FMNMX_IMM: { | ||
| 126 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 127 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 128 | |||
| 129 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 130 | |||
| 131 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 133 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 134 | |||
| 135 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 136 | SetRegister(bb, instr.gpr0, value); | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::RRO_C: | ||
| 140 | case OpCode::Id::RRO_R: | ||
| 141 | case OpCode::Id::RRO_IMM: { | ||
| 142 | // Currently RRO is only implemented as a register move. | ||
| 143 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 144 | SetRegister(bb, instr.gpr0, op_b); | ||
| 145 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | default: | ||
| 149 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 150 | } | ||
| 151 | |||
| 152 | return pc; | ||
| 153 | } | ||
| 154 | |||
| 155 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp new file mode 100644 index 000000000..37eef2bf2 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | ||
| 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | ||
| 21 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | ||
| 22 | } | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | ||
| 24 | |||
| 25 | const bool negate_a = | ||
| 26 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||
| 27 | const bool negate_b = | ||
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | ||
| 29 | |||
| 30 | const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); | ||
| 31 | |||
| 32 | // instr.alu_half.type_a | ||
| 33 | |||
| 34 | Node op_b = [&]() { | ||
| 35 | switch (opcode->get().GetId()) { | ||
| 36 | case OpCode::Id::HADD2_C: | ||
| 37 | case OpCode::Id::HMUL2_C: | ||
| 38 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 39 | case OpCode::Id::HADD2_R: | ||
| 40 | case OpCode::Id::HMUL2_R: | ||
| 41 | return GetRegister(instr.gpr20); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | ||
| 48 | |||
| 49 | Node value = [&]() { | ||
| 50 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; | ||
| 51 | switch (opcode->get().GetId()) { | ||
| 52 | case OpCode::Id::HADD2_C: | ||
| 53 | case OpCode::Id::HADD2_R: | ||
| 54 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 55 | case OpCode::Id::HMUL2_C: | ||
| 56 | case OpCode::Id::HMUL2_R: | ||
| 57 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 58 | default: | ||
| 59 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 60 | return Immediate(0); | ||
| 61 | } | ||
| 62 | }(); | ||
| 63 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 64 | |||
| 65 | SetRegister(bb, instr.gpr0, value); | ||
| 66 | |||
| 67 | return pc; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp new file mode 100644 index 000000000..7b4f7d284 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 20 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 21 | } else { | ||
| 22 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | ||
| 23 | } | ||
| 24 | UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, | ||
| 25 | "Half float immediate saturation not implemented"); | ||
| 26 | |||
| 27 | Node op_a = GetRegister(instr.gpr8); | ||
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 29 | |||
| 30 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 31 | |||
| 32 | Node value = [&]() { | ||
| 33 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HADD2_IMM: | ||
| 36 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 37 | case OpCode::Id::HMUL2_IMM: | ||
| 38 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return Immediate(0); | ||
| 42 | } | ||
| 43 | }(); | ||
| 44 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 45 | |||
| 46 | SetRegister(bb, instr.gpr0, value); | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp new file mode 100644 index 000000000..4fd3db54e --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | switch (opcode->get().GetId()) { | ||
| 20 | case OpCode::Id::MOV32_IMM: { | ||
| 21 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 22 | break; | ||
| 23 | } | ||
| 24 | case OpCode::Id::FMUL32_IMM: { | ||
| 25 | Node value = | ||
| 26 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 27 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 28 | |||
| 29 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 30 | SetRegister(bb, instr.gpr0, value); | ||
| 31 | break; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FADD32I: { | ||
| 34 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 35 | instr.fadd32i.negate_a); | ||
| 36 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 37 | instr.fadd32i.negate_b); | ||
| 38 | |||
| 39 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 40 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 46 | opcode->get().GetName()); | ||
| 47 | } | ||
| 48 | |||
| 49 | return pc; | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp new file mode 100644 index 000000000..cc9a76a19 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::IAdd3Height; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::Register; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | Node op_b = [&]() { | ||
| 24 | if (instr.is_b_imm) { | ||
| 25 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 26 | } else if (instr.is_b_gpr) { | ||
| 27 | return GetRegister(instr.gpr20); | ||
| 28 | } else { | ||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | |||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::IADD_C: | ||
| 35 | case OpCode::Id::IADD_R: | ||
| 36 | case OpCode::Id::IADD_IMM: { | ||
| 37 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); | ||
| 38 | |||
| 39 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 40 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 41 | |||
| 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 43 | |||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | case OpCode::Id::IADD3_C: | ||
| 49 | case OpCode::Id::IADD3_R: | ||
| 50 | case OpCode::Id::IADD3_IMM: { | ||
| 51 | Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 54 | switch (height) { | ||
| 55 | case IAdd3Height::None: | ||
| 56 | return value; | ||
| 57 | case IAdd3Height::LowerHalfWord: | ||
| 58 | return BitfieldExtract(value, 0, 16); | ||
| 59 | case IAdd3Height::UpperHalfWord: | ||
| 60 | return BitfieldExtract(value, 16, 16); | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); | ||
| 63 | return Immediate(0); | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | |||
| 67 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 68 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 69 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 70 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 71 | } | ||
| 72 | |||
| 73 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 74 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 75 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 76 | |||
| 77 | const Node value = [&]() { | ||
| 78 | const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 79 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 80 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 81 | } | ||
| 82 | const Node shifted = [&]() { | ||
| 83 | switch (instr.iadd3.mode) { | ||
| 84 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 85 | // TODO(tech4me): According to | ||
| 86 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 87 | // The addition between op_a and op_b should be done in uint33, more | ||
| 88 | // investigation required | ||
| 89 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 90 | Immediate(16)); | ||
| 91 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 92 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 93 | Immediate(16)); | ||
| 94 | default: | ||
| 95 | return add_ab; | ||
| 96 | } | ||
| 97 | }(); | ||
| 98 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 99 | }(); | ||
| 100 | |||
| 101 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 102 | SetRegister(bb, instr.gpr0, value); | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case OpCode::Id::ISCADD_C: | ||
| 106 | case OpCode::Id::ISCADD_R: | ||
| 107 | case OpCode::Id::ISCADD_IMM: { | ||
| 108 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 109 | "Condition codes generation in ISCADD is not implemented"); | ||
| 110 | |||
| 111 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 112 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 113 | |||
| 114 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 115 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 116 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 117 | |||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::POPC_C: | ||
| 123 | case OpCode::Id::POPC_R: | ||
| 124 | case OpCode::Id::POPC_IMM: { | ||
| 125 | if (instr.popc.invert) { | ||
| 126 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 127 | } | ||
| 128 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 129 | SetRegister(bb, instr.gpr0, value); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::SEL_C: | ||
| 133 | case OpCode::Id::SEL_R: | ||
| 134 | case OpCode::Id::SEL_IMM: { | ||
| 135 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 136 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 137 | SetRegister(bb, instr.gpr0, value); | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | case OpCode::Id::LOP_C: | ||
| 141 | case OpCode::Id::LOP_R: | ||
| 142 | case OpCode::Id::LOP_IMM: { | ||
| 143 | if (instr.alu.lop.invert_a) | ||
| 144 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 145 | if (instr.alu.lop.invert_b) | ||
| 146 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 147 | |||
| 148 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 149 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 150 | instr.generates_cc); | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | case OpCode::Id::LOP3_C: | ||
| 154 | case OpCode::Id::LOP3_R: | ||
| 155 | case OpCode::Id::LOP3_IMM: { | ||
| 156 | const Node op_c = GetRegister(instr.gpr39); | ||
| 157 | const Node lut = [&]() { | ||
| 158 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 159 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 160 | } else { | ||
| 161 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 162 | } | ||
| 163 | }(); | ||
| 164 | |||
| 165 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | case OpCode::Id::IMNMX_C: | ||
| 169 | case OpCode::Id::IMNMX_R: | ||
| 170 | case OpCode::Id::IMNMX_IMM: { | ||
| 171 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 172 | |||
| 173 | const bool is_signed = instr.imnmx.is_signed; | ||
| 174 | |||
| 175 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 176 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 177 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 178 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 179 | |||
| 180 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 181 | SetRegister(bb, instr.gpr0, value); | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | case OpCode::Id::LEA_R2: | ||
| 185 | case OpCode::Id::LEA_R1: | ||
| 186 | case OpCode::Id::LEA_IMM: | ||
| 187 | case OpCode::Id::LEA_RZ: | ||
| 188 | case OpCode::Id::LEA_HI: { | ||
| 189 | const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::LEA_R2: { | ||
| 192 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 193 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 194 | } | ||
| 195 | |||
| 196 | case OpCode::Id::LEA_R1: { | ||
| 197 | const bool neg = instr.lea.r1.neg != 0; | ||
| 198 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 199 | GetRegister(instr.gpr20), | ||
| 200 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 201 | } | ||
| 202 | |||
| 203 | case OpCode::Id::LEA_IMM: { | ||
| 204 | const bool neg = instr.lea.imm.neg != 0; | ||
| 205 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 206 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 207 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 208 | } | ||
| 209 | |||
| 210 | case OpCode::Id::LEA_RZ: { | ||
| 211 | const bool neg = instr.lea.rz.neg != 0; | ||
| 212 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 213 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 214 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 215 | } | ||
| 216 | |||
| 217 | case OpCode::Id::LEA_HI: | ||
| 218 | default: | ||
| 219 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 220 | |||
| 221 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 222 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 223 | } | ||
| 224 | }(); | ||
| 225 | |||
| 226 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 227 | "Unhandled LEA Predicate"); | ||
| 228 | |||
| 229 | const Node shifted_c = | ||
| 230 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); | ||
| 231 | const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); | ||
| 232 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); | ||
| 233 | |||
| 234 | SetRegister(bb, instr.gpr0, value); | ||
| 235 | |||
| 236 | break; | ||
| 237 | } | ||
| 238 | default: | ||
| 239 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 240 | } | ||
| 241 | |||
| 242 | return pc; | ||
| 243 | } | ||
| 244 | |||
| 245 | void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 246 | Node imm_lut, bool sets_cc) { | ||
| 247 | constexpr u32 lop_iterations = 32; | ||
| 248 | const Node one = Immediate(1); | ||
| 249 | const Node two = Immediate(2); | ||
| 250 | |||
| 251 | Node value{}; | ||
| 252 | for (u32 i = 0; i < lop_iterations; ++i) { | ||
| 253 | const Node shift_amount = Immediate(i); | ||
| 254 | |||
| 255 | const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); | ||
| 256 | const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); | ||
| 257 | |||
| 258 | const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); | ||
| 259 | const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); | ||
| 260 | const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); | ||
| 261 | |||
| 262 | const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); | ||
| 263 | const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); | ||
| 264 | const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); | ||
| 265 | |||
| 266 | const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); | ||
| 267 | const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); | ||
| 268 | |||
| 269 | const Node shifted_bit = | ||
| 270 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); | ||
| 271 | const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); | ||
| 272 | |||
| 273 | const Node right = | ||
| 274 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); | ||
| 275 | |||
| 276 | if (i > 0) { | ||
| 277 | value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); | ||
| 278 | } else { | ||
| 279 | value = right; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | SetInternalFlagsFromInteger(bb, value, sets_cc); | ||
| 284 | SetRegister(bb, dest, value); | ||
| 285 | } | ||
| 286 | |||
| 287 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp new file mode 100644 index 000000000..b26a6e473 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::LogicOperation; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::PredicateResultMode; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 25 | |||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::IADD32I: { | ||
| 28 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 29 | |||
| 30 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); | ||
| 31 | |||
| 32 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 33 | |||
| 34 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 35 | SetRegister(bb, instr.gpr0, value); | ||
| 36 | break; | ||
| 37 | } | ||
| 38 | case OpCode::Id::LOP32I: { | ||
| 39 | if (instr.alu.lop32i.invert_a) | ||
| 40 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 41 | |||
| 42 | if (instr.alu.lop32i.invert_b) | ||
| 43 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 44 | |||
| 45 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 46 | PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 47 | break; | ||
| 48 | } | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 51 | opcode->get().GetName()); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, | ||
| 58 | Node op_a, Node op_b, PredicateResultMode predicate_mode, | ||
| 59 | Pred predicate, bool sets_cc) { | ||
| 60 | const Node result = [&]() { | ||
| 61 | switch (logic_op) { | ||
| 62 | case LogicOperation::And: | ||
| 63 | return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); | ||
| 64 | case LogicOperation::Or: | ||
| 65 | return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); | ||
| 66 | case LogicOperation::Xor: | ||
| 67 | return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); | ||
| 68 | case LogicOperation::PassB: | ||
| 69 | return op_b; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 72 | return Immediate(0); | ||
| 73 | } | ||
| 74 | }(); | ||
| 75 | |||
| 76 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 77 | SetRegister(bb, dest, result); | ||
| 78 | |||
| 79 | // Write the predicate value depending on the predicate mode. | ||
| 80 | switch (predicate_mode) { | ||
| 81 | case PredicateResultMode::None: | ||
| 82 | // Do nothing. | ||
| 83 | return; | ||
| 84 | case PredicateResultMode::NotZero: { | ||
| 85 | // Set the predicate to true if the result is not zero. | ||
| 86 | const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); | ||
| 87 | SetPredicate(bb, static_cast<u64>(predicate), compare); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | default: | ||
| 91 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 92 | static_cast<u32>(predicate_mode)); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp new file mode 100644 index 000000000..0734141b0 --- /dev/null +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 20 | |||
| 21 | Node op_a = GetRegister(instr.gpr8); | ||
| 22 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | ||
| 23 | |||
| 24 | switch (opcode->get().GetId()) { | ||
| 25 | case OpCode::Id::BFE_IMM: { | ||
| 26 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 27 | "Condition codes generation in BFE is not implemented"); | ||
| 28 | |||
| 29 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | ||
| 30 | const Node outer_shift_imm = | ||
| 31 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 32 | |||
| 33 | const Node inner_shift = | ||
| 34 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 35 | const Node outer_shift = | ||
| 36 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 37 | |||
| 38 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | ||
| 39 | SetRegister(bb, instr.gpr0, outer_shift); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | default: | ||
| 43 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 44 | } | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp new file mode 100644 index 000000000..942d6729d --- /dev/null +++ b/src/video_core/shader/decode/bfi.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { | ||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::BFI_IMM_R: | ||
| 22 | return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; | ||
| 23 | default: | ||
| 24 | UNREACHABLE(); | ||
| 25 | return {Immediate(0), Immediate(0)}; | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | const Node insert = GetRegister(instr.gpr8); | ||
| 29 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 30 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 31 | |||
| 32 | const Node value = | ||
| 33 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 36 | SetRegister(bb, instr.gpr0, value); | ||
| 37 | |||
| 38 | return pc; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp new file mode 100644 index 000000000..728a393a1 --- /dev/null +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Register; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::I2I_R: { | ||
| 22 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 23 | |||
| 24 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 25 | const bool output_signed = instr.conversion.is_output_signed; | ||
| 26 | |||
| 27 | Node value = GetRegister(instr.gpr20); | ||
| 28 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 29 | |||
| 30 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, | ||
| 31 | input_signed); | ||
| 32 | if (input_signed != output_signed) { | ||
| 33 | value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::I2F_R: | ||
| 41 | case OpCode::Id::I2F_C: { | ||
| 42 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 43 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 44 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 45 | "Condition codes generation in I2F is not implemented"); | ||
| 46 | |||
| 47 | Node value = [&]() { | ||
| 48 | if (instr.is_b_gpr) { | ||
| 49 | return GetRegister(instr.gpr20); | ||
| 50 | } else { | ||
| 51 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 52 | } | ||
| 53 | }(); | ||
| 54 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 55 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 56 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 57 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 58 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 59 | |||
| 60 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 61 | SetRegister(bb, instr.gpr0, value); | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | case OpCode::Id::F2F_R: | ||
| 65 | case OpCode::Id::F2F_C: { | ||
| 66 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 69 | "Condition codes generation in F2F is not implemented"); | ||
| 70 | |||
| 71 | Node value = [&]() { | ||
| 72 | if (instr.is_b_gpr) { | ||
| 73 | return GetRegister(instr.gpr20); | ||
| 74 | } else { | ||
| 75 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 76 | } | ||
| 77 | }(); | ||
| 78 | |||
| 79 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 80 | |||
| 81 | value = [&]() { | ||
| 82 | switch (instr.conversion.f2f.rounding) { | ||
| 83 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 84 | return value; | ||
| 85 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 86 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 87 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 88 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 89 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 90 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 91 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 92 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 93 | } | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 95 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 96 | return Immediate(0); | ||
| 97 | }(); | ||
| 98 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 99 | |||
| 100 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 101 | SetRegister(bb, instr.gpr0, value); | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | case OpCode::Id::F2I_R: | ||
| 105 | case OpCode::Id::F2I_C: { | ||
| 106 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 107 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 108 | "Condition codes generation in F2I is not implemented"); | ||
| 109 | Node value = [&]() { | ||
| 110 | if (instr.is_b_gpr) { | ||
| 111 | return GetRegister(instr.gpr20); | ||
| 112 | } else { | ||
| 113 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 114 | } | ||
| 115 | }(); | ||
| 116 | |||
| 117 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 118 | |||
| 119 | value = [&]() { | ||
| 120 | switch (instr.conversion.f2i.rounding) { | ||
| 121 | case Tegra::Shader::F2iRoundingOp::None: | ||
| 122 | return value; | ||
| 123 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 124 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 125 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 126 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 127 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 128 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 129 | default: | ||
| 130 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 131 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 132 | return Immediate(0); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 136 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 137 | value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0, value); | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | default: | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 144 | } | ||
| 145 | |||
| 146 | return pc; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/video_core/shader/decode/decode_integer_set.cpp | |||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp new file mode 100644 index 000000000..52f39d3ff --- /dev/null +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 21 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 22 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 23 | instr.ffma.tab5980_1.Value()); | ||
| 24 | |||
| 25 | const Node op_a = GetRegister(instr.gpr8); | ||
| 26 | |||
| 27 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 28 | switch (opcode->get().GetId()) { | ||
| 29 | case OpCode::Id::FFMA_CR: { | ||
| 30 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 31 | GetRegister(instr.gpr39)}; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FFMA_RR: | ||
| 34 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 35 | case OpCode::Id::FFMA_RC: { | ||
| 36 | return {GetRegister(instr.gpr39), | ||
| 37 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FFMA_IMM: | ||
| 40 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 41 | default: | ||
| 42 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 43 | return {Immediate(0), Immediate(0)}; | ||
| 44 | } | ||
| 45 | }(); | ||
| 46 | |||
| 47 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 48 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 49 | |||
| 50 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 51 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 52 | |||
| 53 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | |||
| 56 | return pc; | ||
| 57 | } | ||
| 58 | |||
| 59 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp new file mode 100644 index 000000000..9f9da2278 --- /dev/null +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp new file mode 100644 index 000000000..dd3aef6f2 --- /dev/null +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | ||
| 37 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 38 | |||
| 39 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 40 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 41 | |||
| 42 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 43 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 44 | |||
| 45 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 46 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 47 | // if enabled | ||
| 48 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 49 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 50 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 51 | } | ||
| 52 | |||
| 53 | return pc; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp new file mode 100644 index 000000000..dfd7cb98f --- /dev/null +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | ||
| 22 | |||
| 23 | // instr.hset2.type_a | ||
| 24 | // instr.hset2.type_b | ||
| 25 | Node op_a = GetRegister(instr.gpr8); | ||
| 26 | Node op_b = [&]() { | ||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HSET2_R: | ||
| 29 | return GetRegister(instr.gpr20); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 37 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 40 | |||
| 41 | MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; | ||
| 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); | ||
| 43 | |||
| 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 45 | |||
| 46 | // HSET2 operates on each half float in the pack. | ||
| 47 | std::array<Node, 2> values; | ||
| 48 | for (u32 i = 0; i < 2; ++i) { | ||
| 49 | const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; | ||
| 50 | const Node true_value = Immediate(raw_value << (i * 16)); | ||
| 51 | const Node false_value = Immediate(0); | ||
| 52 | |||
| 53 | const Node comparison = | ||
| 54 | Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 55 | const Node predicate = Operation(combiner, comparison, second_pred); | ||
| 56 | |||
| 57 | values[i] = | ||
| 58 | Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); | ||
| 59 | } | ||
| 60 | |||
| 61 | const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); | ||
| 62 | SetRegister(bb, instr.gpr0, value); | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp new file mode 100644 index 000000000..53c44ae5a --- /dev/null +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 24 | |||
| 25 | const Node op_b = [&]() { | ||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::HSETP2_R: | ||
| 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | ||
| 29 | instr.hsetp2.negate_b); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | // We can't use the constant predicate as destination. | ||
| 37 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | ||
| 40 | |||
| 41 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 42 | const OperationCode pair_combiner = | ||
| 43 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 44 | |||
| 45 | MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; | ||
| 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); | ||
| 47 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 48 | |||
| 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 50 | const Node value = Operation(combiner, first_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.hsetp2.pred3, value); | ||
| 52 | |||
| 53 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 54 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 55 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 56 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 57 | } | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp new file mode 100644 index 000000000..43a0a9e10 --- /dev/null +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -0,0 +1,77 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfPrecision; | ||
| 15 | using Tegra::Shader::HalfType; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 24 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | ||
| 25 | } else { | ||
| 26 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | ||
| 27 | } | ||
| 28 | |||
| 29 | constexpr auto identity = HalfType::H0_H1; | ||
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | ||
| 35 | auto [saturate, type_b, op_b, type_c, | ||
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HFMA2_CR: | ||
| 39 | neg_b = instr.hfma2.negate_b; | ||
| 40 | neg_c = instr.hfma2.negate_c; | ||
| 41 | return {instr.hfma2.saturate, instr.hfma2.type_b, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 43 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::HFMA2_RC: | ||
| 45 | neg_b = instr.hfma2.negate_b; | ||
| 46 | neg_c = instr.hfma2.negate_c; | ||
| 47 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 48 | instr.hfma2.type_b, | ||
| 49 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 50 | case OpCode::Id::HFMA2_RR: | ||
| 51 | neg_b = instr.hfma2.rr.negate_b; | ||
| 52 | neg_c = instr.hfma2.rr.negate_c; | ||
| 53 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 54 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 55 | case OpCode::Id::HFMA2_IMM_R: | ||
| 56 | neg_c = instr.hfma2.negate_c; | ||
| 57 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 58 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 59 | default: | ||
| 60 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 61 | } | ||
| 62 | }(); | ||
| 63 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | ||
| 64 | |||
| 65 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | ||
| 66 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | ||
| 67 | |||
| 68 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | ||
| 69 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 70 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 71 | |||
| 72 | SetRegister(bb, instr.gpr0, value); | ||
| 73 | |||
| 74 | return pc; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp new file mode 100644 index 000000000..16eb3985f --- /dev/null +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 31 | // is true, and to 0 otherwise. | ||
| 32 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 33 | const Node first_pred = | ||
| 34 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 35 | |||
| 36 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 37 | |||
| 38 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 39 | |||
| 40 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 41 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 42 | const Node value = | ||
| 43 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 44 | |||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | |||
| 47 | return pc; | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp new file mode 100644 index 000000000..daf97174b --- /dev/null +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp new file mode 100644 index 000000000..3dd26da20 --- /dev/null +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -0,0 +1,771 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Attribute; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | using Tegra::Shader::TextureMiscMode; | ||
| 21 | using Tegra::Shader::TextureProcessMode; | ||
| 22 | using Tegra::Shader::TextureType; | ||
| 23 | |||
| 24 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 25 | switch (texture_type) { | ||
| 26 | case TextureType::Texture1D: | ||
| 27 | return 1; | ||
| 28 | case TextureType::Texture2D: | ||
| 29 | return 2; | ||
| 30 | case TextureType::Texture3D: | ||
| 31 | case TextureType::TextureCube: | ||
| 32 | return 3; | ||
| 33 | default: | ||
| 34 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 40 | const Instruction instr = {program_code[pc]}; | ||
| 41 | const auto opcode = OpCode::Decode(instr); | ||
| 42 | |||
| 43 | switch (opcode->get().GetId()) { | ||
| 44 | case OpCode::Id::LD_A: { | ||
| 45 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 46 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 47 | "Indirect attribute loads are not supported"); | ||
| 48 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 49 | "Unaligned attribute loads are not supported"); | ||
| 50 | |||
| 51 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 52 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 53 | |||
| 54 | u64 next_element = instr.attribute.fmt20.element; | ||
| 55 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 56 | |||
| 57 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 58 | const Node buffer = GetRegister(instr.gpr39); | ||
| 59 | const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 60 | next_element, input_mode, buffer); | ||
| 61 | |||
| 62 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 63 | |||
| 64 | // Load the next attribute element into the following register. If the element | ||
| 65 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 66 | // attribute. | ||
| 67 | next_element = (next_element + 1) % 4; | ||
| 68 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 69 | }; | ||
| 70 | |||
| 71 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 72 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 73 | LoadNextElement(reg_offset); | ||
| 74 | } | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | case OpCode::Id::LD_C: { | ||
| 78 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 79 | |||
| 80 | Node index = GetRegister(instr.gpr8); | ||
| 81 | |||
| 82 | const Node op_a = | ||
| 83 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 84 | |||
| 85 | switch (instr.ld_c.type.Value()) { | ||
| 86 | case Tegra::Shader::UniformType::Single: | ||
| 87 | SetRegister(bb, instr.gpr0, op_a); | ||
| 88 | break; | ||
| 89 | |||
| 90 | case Tegra::Shader::UniformType::Double: { | ||
| 91 | const Node op_b = | ||
| 92 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | ||
| 93 | |||
| 94 | SetTemporal(bb, 0, op_a); | ||
| 95 | SetTemporal(bb, 1, op_b); | ||
| 96 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 97 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | default: | ||
| 101 | UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 102 | } | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case OpCode::Id::LD_L: { | ||
| 106 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 107 | static_cast<u32>(instr.ld_l.unknown.Value())); | ||
| 108 | |||
| 109 | const auto GetLmem = [&](s32 offset) { | ||
| 110 | ASSERT(offset % 4 == 0); | ||
| 111 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 112 | const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), | ||
| 113 | immediate_offset); | ||
| 114 | return GetLocalMemory(address); | ||
| 115 | }; | ||
| 116 | |||
| 117 | switch (instr.ldst_sl.type.Value()) { | ||
| 118 | case Tegra::Shader::StoreType::Bits32: | ||
| 119 | case Tegra::Shader::StoreType::Bits64: | ||
| 120 | case Tegra::Shader::StoreType::Bits128: { | ||
| 121 | const u32 count = [&]() { | ||
| 122 | switch (instr.ldst_sl.type.Value()) { | ||
| 123 | case Tegra::Shader::StoreType::Bits32: | ||
| 124 | return 1; | ||
| 125 | case Tegra::Shader::StoreType::Bits64: | ||
| 126 | return 2; | ||
| 127 | case Tegra::Shader::StoreType::Bits128: | ||
| 128 | return 4; | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return 0; | ||
| 132 | } | ||
| 133 | }(); | ||
| 134 | for (u32 i = 0; i < count; ++i) | ||
| 135 | SetTemporal(bb, i, GetLmem(i * 4)); | ||
| 136 | for (u32 i = 0; i < count; ++i) | ||
| 137 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | default: | ||
| 141 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 142 | static_cast<u32>(instr.ldst_sl.type.Value())); | ||
| 143 | } | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case OpCode::Id::LDG: { | ||
| 147 | const u32 count = [&]() { | ||
| 148 | switch (instr.ldg.type) { | ||
| 149 | case Tegra::Shader::UniformType::Single: | ||
| 150 | return 1; | ||
| 151 | case Tegra::Shader::UniformType::Double: | ||
| 152 | return 2; | ||
| 153 | case Tegra::Shader::UniformType::Quad: | ||
| 154 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 155 | return 4; | ||
| 156 | default: | ||
| 157 | UNIMPLEMENTED_MSG("Unimplemented LDG size!"); | ||
| 158 | return 1; | ||
| 159 | } | ||
| 160 | }(); | ||
| 161 | |||
| 162 | const Node addr_register = GetRegister(instr.gpr8); | ||
| 163 | const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); | ||
| 164 | const auto cbuf = std::get_if<CbufNode>(base_address); | ||
| 165 | ASSERT(cbuf != nullptr); | ||
| 166 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 167 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 168 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 169 | |||
| 170 | bb.push_back(Comment( | ||
| 171 | fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 172 | |||
| 173 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 174 | used_global_memory_bases.insert(descriptor); | ||
| 175 | |||
| 176 | const Node immediate_offset = | ||
| 177 | Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); | ||
| 178 | const Node base_real_address = | ||
| 179 | Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); | ||
| 180 | |||
| 181 | for (u32 i = 0; i < count; ++i) { | ||
| 182 | const Node it_offset = Immediate(i * 4); | ||
| 183 | const Node real_address = | ||
| 184 | Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); | ||
| 185 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 186 | |||
| 187 | SetTemporal(bb, i, gmem); | ||
| 188 | } | ||
| 189 | for (u32 i = 0; i < count; ++i) { | ||
| 190 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | case OpCode::Id::ST_A: { | ||
| 195 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 196 | "Indirect attribute loads are not supported"); | ||
| 197 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 198 | "Unaligned attribute loads are not supported"); | ||
| 199 | |||
| 200 | u64 next_element = instr.attribute.fmt20.element; | ||
| 201 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 202 | |||
| 203 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 204 | const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 205 | next_element, GetRegister(instr.gpr39)); | ||
| 206 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 207 | |||
| 208 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 209 | |||
| 210 | // Load the next attribute element into the following register. If the element | ||
| 211 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 212 | // attribute. | ||
| 213 | next_element = (next_element + 1) % 4; | ||
| 214 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 215 | }; | ||
| 216 | |||
| 217 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 218 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 219 | StoreNextElement(reg_offset); | ||
| 220 | } | ||
| 221 | |||
| 222 | break; | ||
| 223 | } | ||
| 224 | case OpCode::Id::ST_L: { | ||
| 225 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 226 | static_cast<u32>(instr.st_l.unknown.Value())); | ||
| 227 | |||
| 228 | const auto GetLmemAddr = [&](s32 offset) { | ||
| 229 | ASSERT(offset % 4 == 0); | ||
| 230 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 231 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | ||
| 232 | }; | ||
| 233 | |||
| 234 | switch (instr.ldst_sl.type.Value()) { | ||
| 235 | case Tegra::Shader::StoreType::Bits128: | ||
| 236 | SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3)); | ||
| 237 | SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2)); | ||
| 238 | case Tegra::Shader::StoreType::Bits64: | ||
| 239 | SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1)); | ||
| 240 | case Tegra::Shader::StoreType::Bits32: | ||
| 241 | SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0)); | ||
| 242 | break; | ||
| 243 | default: | ||
| 244 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 245 | static_cast<u32>(instr.ldst_sl.type.Value())); | ||
| 246 | } | ||
| 247 | break; | ||
| 248 | } | ||
| 249 | case OpCode::Id::TEX: { | ||
| 250 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 251 | "AOFFI is not implemented"); | ||
| 252 | |||
| 253 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 254 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 255 | } | ||
| 256 | |||
| 257 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 258 | const bool is_array = instr.tex.array != 0; | ||
| 259 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 260 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 261 | WriteTexInstructionFloat( | ||
| 262 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | case OpCode::Id::TEXS: { | ||
| 266 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 267 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 268 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 269 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 270 | |||
| 271 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 272 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 273 | } | ||
| 274 | |||
| 275 | const Node4 components = | ||
| 276 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 277 | |||
| 278 | if (instr.texs.fp32_flag) { | ||
| 279 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 280 | } else { | ||
| 281 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 282 | } | ||
| 283 | break; | ||
| 284 | } | ||
| 285 | case OpCode::Id::TLD4: { | ||
| 286 | ASSERT(instr.tld4.array == 0); | ||
| 287 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 288 | "AOFFI is not implemented"); | ||
| 289 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 290 | "NDV is not implemented"); | ||
| 291 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 292 | "PTP is not implemented"); | ||
| 293 | |||
| 294 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 295 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 296 | } | ||
| 297 | |||
| 298 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 299 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 300 | const bool is_array = instr.tld4.array != 0; | ||
| 301 | WriteTexInstructionFloat(bb, instr, | ||
| 302 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 303 | break; | ||
| 304 | } | ||
| 305 | case OpCode::Id::TLD4S: { | ||
| 306 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 307 | "AOFFI is not implemented"); | ||
| 308 | |||
| 309 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 310 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 311 | } | ||
| 312 | |||
| 313 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 314 | const Node op_a = GetRegister(instr.gpr8); | ||
| 315 | const Node op_b = GetRegister(instr.gpr20); | ||
| 316 | |||
| 317 | std::vector<Node> coords; | ||
| 318 | |||
| 319 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 320 | if (depth_compare) { | ||
| 321 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 322 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 323 | coords.push_back(op_a); | ||
| 324 | coords.push_back(op_y); | ||
| 325 | coords.push_back(op_b); | ||
| 326 | } else { | ||
| 327 | coords.push_back(op_a); | ||
| 328 | coords.push_back(op_b); | ||
| 329 | } | ||
| 330 | const auto num_coords = static_cast<u32>(coords.size()); | ||
| 331 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | ||
| 332 | |||
| 333 | const auto& sampler = | ||
| 334 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 335 | |||
| 336 | Node4 values; | ||
| 337 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 338 | auto params = coords; | ||
| 339 | MetaTexture meta{sampler, element, num_coords}; | ||
| 340 | values[element] = | ||
| 341 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 342 | } | ||
| 343 | |||
| 344 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | case OpCode::Id::TXQ: { | ||
| 348 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 349 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 350 | } | ||
| 351 | |||
| 352 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 353 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 354 | // uses. This must be fixed at a later instance. | ||
| 355 | const auto& sampler = | ||
| 356 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 357 | |||
| 358 | u32 indexer = 0; | ||
| 359 | switch (instr.txq.query_type) { | ||
| 360 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 361 | for (u32 element = 0; element < 4; ++element) { | ||
| 362 | if (instr.txq.IsComponentEnabled(element)) { | ||
| 363 | MetaTexture meta{sampler, element}; | ||
| 364 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | ||
| 365 | std::move(meta), GetRegister(instr.gpr8)); | ||
| 366 | SetTemporal(bb, indexer++, value); | ||
| 367 | } | ||
| 368 | } | ||
| 369 | for (u32 i = 0; i < indexer; ++i) { | ||
| 370 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 371 | } | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | default: | ||
| 375 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 376 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 377 | } | ||
| 378 | break; | ||
| 379 | } | ||
| 380 | case OpCode::Id::TMML: { | ||
| 381 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 382 | "NDV is not implemented"); | ||
| 383 | |||
| 384 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 385 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 386 | } | ||
| 387 | |||
| 388 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 389 | const bool is_array = instr.tmml.array != 0; | ||
| 390 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 391 | |||
| 392 | std::vector<Node> coords; | ||
| 393 | |||
| 394 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 395 | switch (texture_type) { | ||
| 396 | case TextureType::Texture1D: | ||
| 397 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 398 | break; | ||
| 399 | case TextureType::Texture2D: | ||
| 400 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 401 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 402 | break; | ||
| 403 | default: | ||
| 404 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 405 | |||
| 406 | // Fallback to interpreting as a 2D texture for now | ||
| 407 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 408 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 409 | texture_type = TextureType::Texture2D; | ||
| 410 | } | ||
| 411 | |||
| 412 | for (u32 element = 0; element < 2; ++element) { | ||
| 413 | auto params = coords; | ||
| 414 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; | ||
| 415 | const Node value = | ||
| 416 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 417 | SetTemporal(bb, element, value); | ||
| 418 | } | ||
| 419 | for (u32 element = 0; element < 2; ++element) { | ||
| 420 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 421 | } | ||
| 422 | |||
| 423 | break; | ||
| 424 | } | ||
| 425 | case OpCode::Id::TLDS: { | ||
| 426 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 427 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 428 | |||
| 429 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 430 | "AOFFI is not implemented"); | ||
| 431 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 432 | |||
| 433 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 434 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 435 | } | ||
| 436 | |||
| 437 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | default: | ||
| 441 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 442 | } | ||
| 443 | |||
| 444 | return pc; | ||
| 445 | } | ||
| 446 | |||
| 447 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 448 | bool is_array, bool is_shadow) { | ||
| 449 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 450 | |||
| 451 | // If this sampler has already been used, return the existing mapping. | ||
| 452 | const auto itr = | ||
| 453 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 454 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 455 | if (itr != used_samplers.end()) { | ||
| 456 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 457 | itr->IsShadow() == is_shadow); | ||
| 458 | return *itr; | ||
| 459 | } | ||
| 460 | |||
| 461 | // Otherwise create a new mapping for this sampler | ||
| 462 | const std::size_t next_index = used_samplers.size(); | ||
| 463 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 464 | return *used_samplers.emplace(entry).first; | ||
| 465 | } | ||
| 466 | |||
| 467 | void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 468 | const Node4& components) { | ||
| 469 | u32 dest_elem = 0; | ||
| 470 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 471 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 472 | // Skip disabled components | ||
| 473 | continue; | ||
| 474 | } | ||
| 475 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 476 | } | ||
| 477 | // After writing values in temporals, move them to the real registers | ||
| 478 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 479 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 480 | } | ||
| 481 | } | ||
| 482 | |||
| 483 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 484 | const Node4& components) { | ||
| 485 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 486 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 487 | |||
| 488 | u32 dest_elem = 0; | ||
| 489 | for (u32 component = 0; component < 4; ++component) { | ||
| 490 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 491 | continue; | ||
| 492 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 493 | } | ||
| 494 | |||
| 495 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 496 | if (i < 2) { | ||
| 497 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 498 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 499 | } else { | ||
| 500 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 501 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 502 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, | ||
| 508 | const Node4& components) { | ||
| 509 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 510 | // float instruction). | ||
| 511 | |||
| 512 | Node4 values; | ||
| 513 | u32 dest_elem = 0; | ||
| 514 | for (u32 component = 0; component < 4; ++component) { | ||
| 515 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 516 | continue; | ||
| 517 | values[dest_elem++] = components[component]; | ||
| 518 | } | ||
| 519 | if (dest_elem == 0) | ||
| 520 | return; | ||
| 521 | |||
| 522 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 523 | |||
| 524 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 525 | if (dest_elem <= 2) { | ||
| 526 | SetRegister(bb, instr.gpr0, first_value); | ||
| 527 | return; | ||
| 528 | } | ||
| 529 | |||
| 530 | SetTemporal(bb, 0, first_value); | ||
| 531 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 532 | |||
| 533 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 534 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 535 | } | ||
| 536 | |||
| 537 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 538 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 539 | std::size_t array_offset, std::size_t bias_offset, | ||
| 540 | std::vector<Node>&& coords) { | ||
| 541 | UNIMPLEMENTED_IF_MSG( | ||
| 542 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | ||
| 543 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | ||
| 544 | "This method is not supported."); | ||
| 545 | |||
| 546 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 547 | |||
| 548 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 549 | process_mode == TextureProcessMode::LL || | ||
| 550 | process_mode == TextureProcessMode::LLA; | ||
| 551 | |||
| 552 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 553 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 554 | const bool gl_lod_supported = | ||
| 555 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 556 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 557 | |||
| 558 | const OperationCode read_method = | ||
| 559 | lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; | ||
| 560 | |||
| 561 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 562 | |||
| 563 | std::optional<u32> array_offset_value; | ||
| 564 | if (is_array) | ||
| 565 | array_offset_value = static_cast<u32>(array_offset); | ||
| 566 | |||
| 567 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 568 | |||
| 569 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 570 | if (process_mode == TextureProcessMode::LZ) { | ||
| 571 | coords.push_back(Immediate(0.0f)); | ||
| 572 | } else { | ||
| 573 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 574 | // field with an offset depending on the usage of the other registers | ||
| 575 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | ||
| 576 | } | ||
| 577 | } | ||
| 578 | |||
| 579 | Node4 values; | ||
| 580 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 581 | auto params = coords; | ||
| 582 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | ||
| 583 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | ||
| 584 | } | ||
| 585 | |||
| 586 | return values; | ||
| 587 | } | ||
| 588 | |||
| 589 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 590 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 591 | const bool lod_bias_enabled = | ||
| 592 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 593 | |||
| 594 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 595 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 596 | // If enabled arrays index is always stored in the gpr8 field | ||
| 597 | const u64 array_register = instr.gpr8.Value(); | ||
| 598 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 599 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 600 | |||
| 601 | std::vector<Node> coords; | ||
| 602 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 603 | coords.push_back(GetRegister(coord_register + i)); | ||
| 604 | } | ||
| 605 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 606 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 607 | coords.push_back(Immediate(0.0f)); | ||
| 608 | } | ||
| 609 | std::size_t array_offset{}; | ||
| 610 | if (is_array) { | ||
| 611 | array_offset = coords.size(); | ||
| 612 | coords.push_back(GetRegister(array_register)); | ||
| 613 | } | ||
| 614 | if (depth_compare) { | ||
| 615 | // Depth is always stored in the register signaled by gpr20 | ||
| 616 | // or in the next register if lod or bias are used | ||
| 617 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 618 | coords.push_back(GetRegister(depth_register)); | ||
| 619 | } | ||
| 620 | // Fill ignored coordinates | ||
| 621 | while (coords.size() < total_coord_count) { | ||
| 622 | coords.push_back(Immediate(0)); | ||
| 623 | } | ||
| 624 | |||
| 625 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 626 | 0, std::move(coords)); | ||
| 627 | } | ||
| 628 | |||
| 629 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 630 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 631 | const bool lod_bias_enabled = | ||
| 632 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 633 | |||
| 634 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 635 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 636 | // If enabled arrays index is always stored in the gpr8 field | ||
| 637 | const u64 array_register = instr.gpr8.Value(); | ||
| 638 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 639 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 640 | const u64 last_coord_register = | ||
| 641 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 642 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 643 | : coord_register + 1; | ||
| 644 | |||
| 645 | std::vector<Node> coords; | ||
| 646 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 647 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 648 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 649 | } | ||
| 650 | |||
| 651 | std::size_t array_offset{}; | ||
| 652 | if (is_array) { | ||
| 653 | array_offset = coords.size(); | ||
| 654 | coords.push_back(GetRegister(array_register)); | ||
| 655 | } | ||
| 656 | if (depth_compare) { | ||
| 657 | // Depth is always stored in the register signaled by gpr20 | ||
| 658 | // or in the next register if lod or bias are used | ||
| 659 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 660 | coords.push_back(GetRegister(depth_register)); | ||
| 661 | } | ||
| 662 | // Fill ignored coordinates | ||
| 663 | while (coords.size() < total_coord_count) { | ||
| 664 | coords.push_back(Immediate(0)); | ||
| 665 | } | ||
| 666 | |||
| 667 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 668 | (coord_count > 2 ? 1 : 0), std::move(coords)); | ||
| 669 | } | ||
| 670 | |||
| 671 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 672 | bool is_array) { | ||
| 673 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 674 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 675 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 676 | |||
| 677 | // If enabled arrays index is always stored in the gpr8 field | ||
| 678 | const u64 array_register = instr.gpr8.Value(); | ||
| 679 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 680 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 681 | |||
| 682 | std::vector<Node> coords; | ||
| 683 | |||
| 684 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 685 | coords.push_back(GetRegister(coord_register + i)); | ||
| 686 | } | ||
| 687 | std::optional<u32> array_offset; | ||
| 688 | if (is_array) { | ||
| 689 | array_offset = static_cast<u32>(coords.size()); | ||
| 690 | coords.push_back(GetRegister(array_register)); | ||
| 691 | } | ||
| 692 | |||
| 693 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 694 | |||
| 695 | Node4 values; | ||
| 696 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 697 | auto params = coords; | ||
| 698 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | ||
| 699 | values[element] = | ||
| 700 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 701 | } | ||
| 702 | |||
| 703 | return values; | ||
| 704 | } | ||
| 705 | |||
| 706 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 707 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 708 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | ||
| 709 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 710 | |||
| 711 | // If enabled arrays index is always stored in the gpr8 field | ||
| 712 | const u64 array_register = instr.gpr8.Value(); | ||
| 713 | // if is array gpr20 is used | ||
| 714 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 715 | |||
| 716 | const u64 last_coord_register = | ||
| 717 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 718 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 719 | : coord_register + 1; | ||
| 720 | |||
| 721 | std::vector<Node> coords; | ||
| 722 | |||
| 723 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 724 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 725 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 726 | } | ||
| 727 | std::optional<u32> array_offset; | ||
| 728 | if (is_array) { | ||
| 729 | array_offset = static_cast<u32>(coords.size()); | ||
| 730 | coords.push_back(GetRegister(array_register)); | ||
| 731 | } | ||
| 732 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 733 | |||
| 734 | if (lod_enabled) { | ||
| 735 | // When lod is used always is in grp20 | ||
| 736 | coords.push_back(GetRegister(instr.gpr20)); | ||
| 737 | } else { | ||
| 738 | coords.push_back(Immediate(0)); | ||
| 739 | } | ||
| 740 | |||
| 741 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 742 | |||
| 743 | Node4 values; | ||
| 744 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 745 | auto params = coords; | ||
| 746 | MetaTexture meta{sampler, element, coords_count, array_offset}; | ||
| 747 | values[element] = | ||
| 748 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 749 | } | ||
| 750 | return values; | ||
| 751 | } | ||
| 752 | |||
| 753 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 754 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 755 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 756 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 757 | |||
| 758 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 759 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 760 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 761 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 762 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 763 | } | ||
| 764 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 765 | total_coord_count += | ||
| 766 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 767 | |||
| 768 | return {coord_count, total_coord_count}; | ||
| 769 | } | ||
| 770 | |||
| 771 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp new file mode 100644 index 000000000..c1e5f4efb --- /dev/null +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::ConditionCode; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Register; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::EXIT: { | ||
| 23 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 24 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", | ||
| 25 | static_cast<u32>(cc)); | ||
| 26 | |||
| 27 | switch (instr.flow.cond) { | ||
| 28 | case Tegra::Shader::FlowCondition::Always: | ||
| 29 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 30 | if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { | ||
| 31 | // If this is an unconditional exit then just end processing here, | ||
| 32 | // otherwise we have to account for the possibility of the condition | ||
| 33 | // not being met, so continue processing the next instruction. | ||
| 34 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 35 | } | ||
| 36 | break; | ||
| 37 | |||
| 38 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 39 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 40 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 41 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 42 | break; | ||
| 43 | |||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 46 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 47 | } | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | case OpCode::Id::KIL: { | ||
| 51 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 52 | |||
| 53 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 54 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", | ||
| 55 | static_cast<u32>(cc)); | ||
| 56 | |||
| 57 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | case OpCode::Id::MOV_SYS: { | ||
| 61 | switch (instr.sys20) { | ||
| 62 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 63 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 64 | SetRegister(bb, instr.gpr0, Immediate(0u)); | ||
| 65 | break; | ||
| 66 | } | ||
| 67 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 68 | // Config pack's third value is Y_NEGATE's state. | ||
| 69 | SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | default: | ||
| 73 | UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); | ||
| 74 | } | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | case OpCode::Id::BRA: { | ||
| 78 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 79 | "BRA with constant buffers are not implemented"); | ||
| 80 | |||
| 81 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 82 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 83 | |||
| 84 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 85 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 86 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 87 | } else { | ||
| 88 | bb.push_back(branch); | ||
| 89 | } | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | case OpCode::Id::SSY: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 94 | "Constant buffer flow is not supported"); | ||
| 95 | |||
| 96 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the | ||
| 97 | // target of the jump that the SYNC instruction will make. The SSY opcode has a similar | ||
| 98 | // structure to the BRA opcode. | ||
| 99 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 100 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | case OpCode::Id::PBK: { | ||
| 104 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 105 | "Constant buffer PBK is not supported"); | ||
| 106 | |||
| 107 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but | ||
| 108 | // using SYNC on a PBK address will kill the shader execution. We don't emulate this because | ||
| 109 | // it's very unlikely a driver will emit such invalid shader. | ||
| 110 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 111 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SYNC: { | ||
| 115 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 116 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | ||
| 117 | static_cast<u32>(cc)); | ||
| 118 | |||
| 119 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 120 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::BRK: { | ||
| 124 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 125 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | ||
| 126 | static_cast<u32>(cc)); | ||
| 127 | |||
| 128 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 129 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::IPA: { | ||
| 133 | const auto& attribute = instr.attribute.fmt28; | ||
| 134 | const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 135 | instr.ipa.sample_mode.Value()}; | ||
| 136 | |||
| 137 | const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); | ||
| 138 | const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); | ||
| 139 | |||
| 140 | SetRegister(bb, instr.gpr0, value); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case OpCode::Id::OUT_R: { | ||
| 144 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 145 | "Stream buffer is not supported"); | ||
| 146 | |||
| 147 | if (instr.out.emit) { | ||
| 148 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 149 | // Hardware uses pointers here but we just ignore it | ||
| 150 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 151 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 152 | } | ||
| 153 | if (instr.out.cut) { | ||
| 154 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case OpCode::Id::ISBERD: { | ||
| 159 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 160 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 161 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 162 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 163 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 164 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | case OpCode::Id::DEPBAR: { | ||
| 168 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | default: | ||
| 172 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 173 | } | ||
| 174 | |||
| 175 | return pc; | ||
| 176 | } | ||
| 177 | |||
| 178 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp new file mode 100644 index 000000000..1717f0653 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::PSETP: { | ||
| 22 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 24 | |||
| 25 | // We can't use the constant predicate as destination. | ||
| 26 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 27 | |||
| 28 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 29 | |||
| 30 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 31 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 32 | |||
| 33 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 34 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 35 | |||
| 36 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 37 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 38 | // enabled | ||
| 39 | SetPredicate(bb, instr.psetp.pred0, | ||
| 40 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 41 | second_pred)); | ||
| 42 | } | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | case OpCode::Id::CSETP: { | ||
| 46 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 47 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 48 | |||
| 49 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 50 | |||
| 51 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 52 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 53 | } | ||
| 54 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 55 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 56 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 57 | } | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | default: | ||
| 61 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 62 | } | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp new file mode 100644 index 000000000..8bd15fb00 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp new file mode 100644 index 000000000..bdb4424a6 --- /dev/null +++ b/src/video_core/shader/decode/register_set_predicate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 20 | |||
| 21 | const Node apply_mask = [&]() { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::R2P_IMM: | ||
| 24 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 25 | default: | ||
| 26 | UNREACHABLE(); | ||
| 27 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 28 | } | ||
| 29 | }(); | ||
| 30 | const Node mask = GetRegister(instr.gpr8); | ||
| 31 | const auto offset = static_cast<u32>(instr.r2p.byte) * 8; | ||
| 32 | |||
| 33 | constexpr u32 programmable_preds = 7; | ||
| 34 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | ||
| 35 | const auto shift = static_cast<u32>(pred); | ||
| 36 | |||
| 37 | const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); | ||
| 38 | const Node condition = | ||
| 39 | Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); | ||
| 40 | |||
| 41 | const Node value_compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 42 | const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); | ||
| 43 | |||
| 44 | const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); | ||
| 45 | bb.push_back(Conditional(condition, {code})); | ||
| 46 | } | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp new file mode 100644 index 000000000..6623f8ff9 --- /dev/null +++ b/src/video_core/shader/decode/shift.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | switch (opcode->get().GetId()) { | ||
| 31 | case OpCode::Id::SHR_C: | ||
| 32 | case OpCode::Id::SHR_R: | ||
| 33 | case OpCode::Id::SHR_IMM: { | ||
| 34 | const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, | ||
| 35 | instr.shift.is_signed, PRECISE, op_a, op_b); | ||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::SHL_C: | ||
| 41 | case OpCode::Id::SHL_R: | ||
| 42 | case OpCode::Id::SHL_IMM: { | ||
| 43 | const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); | ||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | default: | ||
| 49 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp new file mode 100644 index 000000000..c3432356d --- /dev/null +++ b/src/video_core/shader/decode/video.cpp | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | using Tegra::Shader::VideoType; | ||
| 16 | using Tegra::Shader::VmadShr; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | const Node op_a = | ||
| 23 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 24 | instr.video.type_a, instr.video.byte_height_a); | ||
| 25 | const Node op_b = [&]() { | ||
| 26 | if (instr.video.use_register_b) { | ||
| 27 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 28 | instr.video.signed_b, instr.video.type_b, | ||
| 29 | instr.video.byte_height_b); | ||
| 30 | } | ||
| 31 | if (instr.video.signed_b) { | ||
| 32 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 33 | return Immediate(static_cast<u32>(imm)); | ||
| 34 | } else { | ||
| 35 | return Immediate(instr.alu.GetImm20_16()); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | switch (opcode->get().GetId()) { | ||
| 40 | case OpCode::Id::VMAD: { | ||
| 41 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 42 | const Node op_c = GetRegister(instr.gpr39); | ||
| 43 | |||
| 44 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 45 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 46 | |||
| 47 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 48 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 49 | value = | ||
| 50 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 51 | } | ||
| 52 | |||
| 53 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | case OpCode::Id::VSETP: { | ||
| 58 | // We can't use the constant predicate as destination. | ||
| 59 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 60 | |||
| 61 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 62 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 63 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 64 | |||
| 65 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 66 | |||
| 67 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 68 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 69 | |||
| 70 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 71 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 72 | // if enabled | ||
| 73 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 74 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 75 | } | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 80 | } | ||
| 81 | |||
| 82 | return pc; | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, | ||
| 86 | Tegra::Shader::VideoType type, u64 byte_height) { | ||
| 87 | if (!is_chunk) { | ||
| 88 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 89 | } | ||
| 90 | const Node zero = Immediate(0); | ||
| 91 | |||
| 92 | switch (type) { | ||
| 93 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 94 | return BitfieldExtract(op, 0, 16); | ||
| 95 | case Tegra::Shader::VideoType::Size16_High: | ||
| 96 | return BitfieldExtract(op, 16, 16); | ||
| 97 | case Tegra::Shader::VideoType::Size32: | ||
| 98 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 99 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 100 | UNIMPLEMENTED(); | ||
| 101 | return zero; | ||
| 102 | case Tegra::Shader::VideoType::Invalid: | ||
| 103 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 104 | return zero; | ||
| 105 | default: | ||
| 106 | UNREACHABLE(); | ||
| 107 | return zero; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp new file mode 100644 index 000000000..9cb864500 --- /dev/null +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 20 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 21 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 22 | "Condition codes generation in XMAD is not implemented"); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | |||
| 26 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 27 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 28 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 29 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 30 | const bool is_signed_c = is_signed_a; | ||
| 31 | |||
| 32 | auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { | ||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::XMAD_CR: | ||
| 35 | return {instr.xmad.merge_56, | ||
| 36 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 37 | GetRegister(instr.gpr39)}; | ||
| 38 | case OpCode::Id::XMAD_RR: | ||
| 39 | return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 40 | case OpCode::Id::XMAD_RC: | ||
| 41 | return {false, GetRegister(instr.gpr39), | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 43 | case OpCode::Id::XMAD_IMM: | ||
| 44 | return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 45 | GetRegister(instr.gpr39)}; | ||
| 46 | } | ||
| 47 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 48 | return {false, Immediate(0), Immediate(0)}; | ||
| 49 | }(); | ||
| 50 | |||
| 51 | op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); | ||
| 52 | |||
| 53 | const Node original_b = op_b; | ||
| 54 | op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); | ||
| 55 | |||
| 56 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 57 | Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); | ||
| 58 | if (instr.xmad.product_shift_left) { | ||
| 59 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | ||
| 60 | } | ||
| 61 | |||
| 62 | const Node original_c = op_c; | ||
| 63 | op_c = [&]() { | ||
| 64 | switch (instr.xmad.mode) { | ||
| 65 | case Tegra::Shader::XmadMode::None: | ||
| 66 | return original_c; | ||
| 67 | case Tegra::Shader::XmadMode::CLo: | ||
| 68 | return BitfieldExtract(original_c, 0, 16); | ||
| 69 | case Tegra::Shader::XmadMode::CHi: | ||
| 70 | return BitfieldExtract(original_c, 16, 16); | ||
| 71 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 72 | const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 73 | NO_PRECISE, original_b, Immediate(16)); | ||
| 74 | return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, | ||
| 75 | shifted_b); | ||
| 76 | } | ||
| 77 | default: | ||
| 78 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); | ||
| 79 | return Immediate(0); | ||
| 80 | } | ||
| 81 | }(); | ||
| 82 | |||
| 83 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 84 | Node sum = Operation(OperationCode::IAdd, product, op_c); | ||
| 85 | if (is_merge) { | ||
| 86 | const Node a = BitfieldExtract(sum, 0, 16); | ||
| 87 | const Node b = | ||
| 88 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); | ||
| 89 | sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); | ||
| 90 | } | ||
| 91 | |||
| 92 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 93 | SetRegister(bb, instr.gpr0, sum); | ||
| 94 | |||
| 95 | return pc; | ||
| 96 | } | ||
| 97 | |||
| 98 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp new file mode 100644 index 000000000..d7747103e --- /dev/null +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -0,0 +1,444 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Attribute; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::IpaMode; | ||
| 19 | using Tegra::Shader::Pred; | ||
| 20 | using Tegra::Shader::PredCondition; | ||
| 21 | using Tegra::Shader::PredOperation; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | |||
| 24 | Node ShaderIR::StoreNode(NodeData&& node_data) { | ||
| 25 | auto store = std::make_unique<NodeData>(node_data); | ||
| 26 | const Node node = store.get(); | ||
| 27 | stored_nodes.push_back(std::move(store)); | ||
| 28 | return node; | ||
| 29 | } | ||
| 30 | |||
| 31 | Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { | ||
| 32 | return StoreNode(ConditionalNode(condition, std::move(code))); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node ShaderIR::Comment(const std::string& text) { | ||
| 36 | return StoreNode(CommentNode(text)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShaderIR::Immediate(u32 value) { | ||
| 40 | return StoreNode(ImmediateNode(value)); | ||
| 41 | } | ||
| 42 | |||
| 43 | Node ShaderIR::GetRegister(Register reg) { | ||
| 44 | if (reg != Register::ZeroIndex) { | ||
| 45 | used_registers.insert(static_cast<u32>(reg)); | ||
| 46 | } | ||
| 47 | return StoreNode(GprNode(reg)); | ||
| 48 | } | ||
| 49 | |||
| 50 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 51 | return Immediate(instr.alu.GetImm20_19()); | ||
| 52 | } | ||
| 53 | |||
| 54 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 55 | return Immediate(instr.alu.GetImm20_32()); | ||
| 56 | } | ||
| 57 | |||
| 58 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 59 | const auto index = static_cast<u32>(index_); | ||
| 60 | const auto offset = static_cast<u32>(offset_); | ||
| 61 | |||
| 62 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 63 | entry->second.MarkAsUsed(offset); | ||
| 64 | |||
| 65 | return StoreNode(CbufNode(index, Immediate(offset))); | ||
| 66 | } | ||
| 67 | |||
| 68 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 69 | const auto index = static_cast<u32>(index_); | ||
| 70 | const auto offset = static_cast<u32>(offset_); | ||
| 71 | |||
| 72 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 73 | entry->second.MarkAsUsedIndirect(); | ||
| 74 | |||
| 75 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | ||
| 76 | return StoreNode(CbufNode(index, final_offset)); | ||
| 77 | } | ||
| 78 | |||
| 79 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 80 | const auto pred = static_cast<Pred>(pred_); | ||
| 81 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 82 | used_predicates.insert(pred); | ||
| 83 | } | ||
| 84 | |||
| 85 | return StoreNode(PredicateNode(pred, negated)); | ||
| 86 | } | ||
| 87 | |||
| 88 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 89 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 90 | } | ||
| 91 | |||
| 92 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, | ||
| 93 | const Tegra::Shader::IpaMode& input_mode, Node buffer) { | ||
| 94 | const auto [entry, is_new] = | ||
| 95 | used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); | ||
| 96 | entry->second.insert(input_mode); | ||
| 97 | |||
| 98 | return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); | ||
| 99 | } | ||
| 100 | |||
| 101 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 102 | if (index == Attribute::Index::ClipDistances0123 || | ||
| 103 | index == Attribute::Index::ClipDistances4567) { | ||
| 104 | const auto clip_index = | ||
| 105 | static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); | ||
| 106 | used_clip_distances.at(clip_index) = true; | ||
| 107 | } | ||
| 108 | used_output_attributes.insert(index); | ||
| 109 | |||
| 110 | return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); | ||
| 111 | } | ||
| 112 | |||
| 113 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | ||
| 114 | const Node node = StoreNode(InternalFlagNode(flag)); | ||
| 115 | if (negated) { | ||
| 116 | return Operation(OperationCode::LogicalNegate, node); | ||
| 117 | } | ||
| 118 | return node; | ||
| 119 | } | ||
| 120 | |||
| 121 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 122 | return StoreNode(LmemNode(address)); | ||
| 123 | } | ||
| 124 | |||
| 125 | Node ShaderIR::GetTemporal(u32 id) { | ||
| 126 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 127 | } | ||
| 128 | |||
| 129 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 130 | if (absolute) { | ||
| 131 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | ||
| 132 | } | ||
| 133 | if (negate) { | ||
| 134 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | ||
| 135 | } | ||
| 136 | return value; | ||
| 137 | } | ||
| 138 | |||
| 139 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 140 | if (!saturate) { | ||
| 141 | return value; | ||
| 142 | } | ||
| 143 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 144 | const Node positive_one = Immediate(1.0f); | ||
| 145 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | ||
| 146 | } | ||
| 147 | |||
| 148 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | ||
| 149 | switch (size) { | ||
| 150 | case Register::Size::Byte: | ||
| 151 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 152 | Immediate(24)); | ||
| 153 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 154 | Immediate(24)); | ||
| 155 | return value; | ||
| 156 | case Register::Size::Short: | ||
| 157 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 158 | Immediate(16)); | ||
| 159 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 160 | Immediate(16)); | ||
| 161 | case Register::Size::Word: | ||
| 162 | // Default - do nothing | ||
| 163 | return value; | ||
| 164 | default: | ||
| 165 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 166 | return value; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 171 | if (!is_signed) { | ||
| 172 | // Absolute or negate on an unsigned is pointless | ||
| 173 | return value; | ||
| 174 | } | ||
| 175 | if (absolute) { | ||
| 176 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | ||
| 177 | } | ||
| 178 | if (negate) { | ||
| 179 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | ||
| 180 | } | ||
| 181 | return value; | ||
| 182 | } | ||
| 183 | |||
| 184 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 185 | const Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 186 | if (!has_negation) { | ||
| 187 | return value; | ||
| 188 | } | ||
| 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 191 | |||
| 192 | return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); | ||
| 193 | } | ||
| 194 | |||
| 195 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 196 | switch (merge) { | ||
| 197 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 198 | return src; | ||
| 199 | case Tegra::Shader::HalfMerge::F32: | ||
| 200 | return Operation(OperationCode::HMergeF32, src); | ||
| 201 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 202 | return Operation(OperationCode::HMergeH0, dest, src); | ||
| 203 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 204 | return Operation(OperationCode::HMergeH1, dest, src); | ||
| 205 | } | ||
| 206 | UNREACHABLE(); | ||
| 207 | return src; | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 211 | if (absolute) { | ||
| 212 | value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); | ||
| 213 | } | ||
| 214 | if (negate) { | ||
| 215 | value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), | ||
| 216 | GetPredicate(true)); | ||
| 217 | } | ||
| 218 | return value; | ||
| 219 | } | ||
| 220 | |||
| 221 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 222 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 223 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | ||
| 224 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | ||
| 225 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | ||
| 226 | {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, | ||
| 227 | {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, | ||
| 228 | {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, | ||
| 229 | {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, | ||
| 230 | {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, | ||
| 231 | {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, | ||
| 232 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, | ||
| 233 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; | ||
| 234 | |||
| 235 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 236 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 237 | "Unknown predicate comparison operation"); | ||
| 238 | |||
| 239 | Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | ||
| 240 | |||
| 241 | if (condition == PredCondition::LessThanWithNan || | ||
| 242 | condition == PredCondition::NotEqualWithNan || | ||
| 243 | condition == PredCondition::LessEqualWithNan || | ||
| 244 | condition == PredCondition::GreaterThanWithNan || | ||
| 245 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 246 | |||
| 247 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 248 | Operation(OperationCode::LogicalFIsNan, op_a)); | ||
| 249 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 250 | Operation(OperationCode::LogicalFIsNan, op_b)); | ||
| 251 | } | ||
| 252 | |||
| 253 | return predicate; | ||
| 254 | } | ||
| 255 | |||
| 256 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 257 | Node op_b) { | ||
| 258 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 259 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | ||
| 260 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | ||
| 261 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | ||
| 262 | {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, | ||
| 263 | {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, | ||
| 264 | {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, | ||
| 265 | {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, | ||
| 266 | {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, | ||
| 267 | {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, | ||
| 268 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, | ||
| 269 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; | ||
| 270 | |||
| 271 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 272 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 273 | "Unknown predicate comparison operation"); | ||
| 274 | |||
| 275 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | ||
| 276 | |||
| 277 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 278 | condition == PredCondition::NotEqualWithNan || | ||
| 279 | condition == PredCondition::LessEqualWithNan || | ||
| 280 | condition == PredCondition::GreaterThanWithNan || | ||
| 281 | condition == PredCondition::GreaterEqualWithNan, | ||
| 282 | "NaN comparisons for integers are not implemented"); | ||
| 283 | return predicate; | ||
| 284 | } | ||
| 285 | |||
| 286 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 287 | const MetaHalfArithmetic& meta, Node op_a, Node op_b) { | ||
| 288 | |||
| 289 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 290 | condition == PredCondition::NotEqualWithNan || | ||
| 291 | condition == PredCondition::LessEqualWithNan || | ||
| 292 | condition == PredCondition::GreaterThanWithNan || | ||
| 293 | condition == PredCondition::GreaterEqualWithNan, | ||
| 294 | "Unimplemented NaN comparison for half floats"); | ||
| 295 | |||
| 296 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 297 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | ||
| 298 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | ||
| 299 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | ||
| 300 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | ||
| 301 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | ||
| 302 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | ||
| 303 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, | ||
| 304 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, | ||
| 305 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, | ||
| 306 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, | ||
| 307 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; | ||
| 308 | |||
| 309 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 310 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 311 | "Unknown predicate comparison operation"); | ||
| 312 | |||
| 313 | const Node predicate = Operation(comparison->second, meta, op_a, op_b); | ||
| 314 | |||
| 315 | return predicate; | ||
| 316 | } | ||
| 317 | |||
| 318 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 319 | static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | ||
| 320 | {PredOperation::And, OperationCode::LogicalAnd}, | ||
| 321 | {PredOperation::Or, OperationCode::LogicalOr}, | ||
| 322 | {PredOperation::Xor, OperationCode::LogicalXor}, | ||
| 323 | }; | ||
| 324 | |||
| 325 | const auto op = PredicateOperationTable.find(operation); | ||
| 326 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); | ||
| 327 | return op->second; | ||
| 328 | } | ||
| 329 | |||
| 330 | Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | ||
| 331 | switch (cc) { | ||
| 332 | case Tegra::Shader::ConditionCode::NEU: | ||
| 333 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 334 | default: | ||
| 335 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 336 | return GetPredicate(static_cast<u64>(Pred::NeverExecute)); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | |||
| 340 | void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { | ||
| 341 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | ||
| 342 | } | ||
| 343 | |||
| 344 | void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { | ||
| 345 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | ||
| 346 | } | ||
| 347 | |||
| 348 | void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { | ||
| 349 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { | ||
| 357 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 361 | if (!sets_cc) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | ||
| 365 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 366 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 370 | if (!sets_cc) { | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | ||
| 374 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 375 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 376 | } | ||
| 377 | |||
| 378 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 379 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | ||
| 380 | Immediate(bits)); | ||
| 381 | } | ||
| 382 | |||
| 383 | /*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, | ||
| 384 | bool is_signed) { | ||
| 385 | if (is_signed) { | ||
| 386 | return operation_code; | ||
| 387 | } | ||
| 388 | switch (operation_code) { | ||
| 389 | case OperationCode::FCastInteger: | ||
| 390 | return OperationCode::FCastUInteger; | ||
| 391 | case OperationCode::IAdd: | ||
| 392 | return OperationCode::UAdd; | ||
| 393 | case OperationCode::IMul: | ||
| 394 | return OperationCode::UMul; | ||
| 395 | case OperationCode::IDiv: | ||
| 396 | return OperationCode::UDiv; | ||
| 397 | case OperationCode::IMin: | ||
| 398 | return OperationCode::UMin; | ||
| 399 | case OperationCode::IMax: | ||
| 400 | return OperationCode::UMax; | ||
| 401 | case OperationCode::ICastFloat: | ||
| 402 | return OperationCode::UCastFloat; | ||
| 403 | case OperationCode::ICastUnsigned: | ||
| 404 | return OperationCode::UCastSigned; | ||
| 405 | case OperationCode::ILogicalShiftLeft: | ||
| 406 | return OperationCode::ULogicalShiftLeft; | ||
| 407 | case OperationCode::ILogicalShiftRight: | ||
| 408 | return OperationCode::ULogicalShiftRight; | ||
| 409 | case OperationCode::IArithmeticShiftRight: | ||
| 410 | return OperationCode::UArithmeticShiftRight; | ||
| 411 | case OperationCode::IBitwiseAnd: | ||
| 412 | return OperationCode::UBitwiseAnd; | ||
| 413 | case OperationCode::IBitwiseOr: | ||
| 414 | return OperationCode::UBitwiseOr; | ||
| 415 | case OperationCode::IBitwiseXor: | ||
| 416 | return OperationCode::UBitwiseXor; | ||
| 417 | case OperationCode::IBitwiseNot: | ||
| 418 | return OperationCode::UBitwiseNot; | ||
| 419 | case OperationCode::IBitfieldInsert: | ||
| 420 | return OperationCode::UBitfieldInsert; | ||
| 421 | case OperationCode::IBitCount: | ||
| 422 | return OperationCode::UBitCount; | ||
| 423 | case OperationCode::LogicalILessThan: | ||
| 424 | return OperationCode::LogicalULessThan; | ||
| 425 | case OperationCode::LogicalIEqual: | ||
| 426 | return OperationCode::LogicalUEqual; | ||
| 427 | case OperationCode::LogicalILessEqual: | ||
| 428 | return OperationCode::LogicalULessEqual; | ||
| 429 | case OperationCode::LogicalIGreaterThan: | ||
| 430 | return OperationCode::LogicalUGreaterThan; | ||
| 431 | case OperationCode::LogicalINotEqual: | ||
| 432 | return OperationCode::LogicalUNotEqual; | ||
| 433 | case OperationCode::LogicalIGreaterEqual: | ||
| 434 | return OperationCode::LogicalUGreaterEqual; | ||
| 435 | case OperationCode::INegate: | ||
| 436 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 437 | case OperationCode::IAbsolute: | ||
| 438 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 439 | } | ||
| 440 | UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); | ||
| 441 | return {}; | ||
| 442 | } | ||
| 443 | |||
| 444 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h new file mode 100644 index 000000000..6e42e3dfb --- /dev/null +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -0,0 +1,823 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <map> | ||
| 10 | #include <set> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <variant> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | #include "video_core/engines/shader_header.h" | ||
| 20 | |||
| 21 | namespace VideoCommon::Shader { | ||
| 22 | |||
| 23 | class OperationNode; | ||
| 24 | class ConditionalNode; | ||
| 25 | class GprNode; | ||
| 26 | class ImmediateNode; | ||
| 27 | class InternalFlagNode; | ||
| 28 | class PredicateNode; | ||
| 29 | class AbufNode; ///< Attribute buffer | ||
| 30 | class CbufNode; ///< Constant buffer | ||
| 31 | class LmemNode; ///< Local memory | ||
| 32 | class GmemNode; ///< Global memory | ||
| 33 | class CommentNode; | ||
| 34 | |||
| 35 | using ProgramCode = std::vector<u64>; | ||
| 36 | |||
| 37 | using NodeData = | ||
| 38 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, | ||
| 39 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; | ||
| 40 | using Node = const NodeData*; | ||
| 41 | using Node4 = std::array<Node, 4>; | ||
| 42 | using BasicBlock = std::vector<Node>; | ||
| 43 | |||
| 44 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 45 | |||
| 46 | enum class OperationCode { | ||
| 47 | Assign, /// (float& dest, float src) -> void | ||
| 48 | |||
| 49 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 50 | |||
| 51 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 52 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 53 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 54 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 55 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 56 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 57 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 58 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 59 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 60 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 61 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 62 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 63 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 64 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 65 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 66 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 67 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 68 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 69 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 70 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 71 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 72 | |||
| 73 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 74 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 75 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 76 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 77 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 78 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 79 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 80 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 81 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 82 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 83 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 84 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 85 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 86 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 87 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 88 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 89 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 90 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 91 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 92 | |||
| 93 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 94 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 95 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 96 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 97 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 98 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 99 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 100 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 101 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 102 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 103 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 104 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 105 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 106 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 107 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 108 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 109 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 110 | |||
| 111 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 112 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 113 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 114 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 115 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 116 | HMergeF32, /// (f16vec2 src) -> float | ||
| 117 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 118 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 119 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 120 | |||
| 121 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 122 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 123 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 124 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 125 | LogicalNegate, /// (bool a) -> bool | ||
| 126 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 127 | LogicalAll2, /// (bool2 a) -> bool | ||
| 128 | LogicalAny2, /// (bool2 a) -> bool | ||
| 129 | |||
| 130 | LogicalFLessThan, /// (float a, float b) -> bool | ||
| 131 | LogicalFEqual, /// (float a, float b) -> bool | ||
| 132 | LogicalFLessEqual, /// (float a, float b) -> bool | ||
| 133 | LogicalFGreaterThan, /// (float a, float b) -> bool | ||
| 134 | LogicalFNotEqual, /// (float a, float b) -> bool | ||
| 135 | LogicalFGreaterEqual, /// (float a, float b) -> bool | ||
| 136 | LogicalFIsNan, /// (float a) -> bool | ||
| 137 | |||
| 138 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 139 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 140 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 141 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 142 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 143 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 144 | |||
| 145 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 146 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 147 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 148 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 149 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 150 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 151 | |||
| 152 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 158 | |||
| 159 | F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 160 | F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 161 | F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 162 | F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 163 | F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 164 | F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 165 | |||
| 166 | Branch, /// (uint branch_target) -> void | ||
| 167 | PushFlowStack, /// (uint branch_target) -> void | ||
| 168 | PopFlowStack, /// () -> void | ||
| 169 | Exit, /// () -> void | ||
| 170 | Discard, /// () -> void | ||
| 171 | |||
| 172 | EmitVertex, /// () -> void | ||
| 173 | EndPrimitive, /// () -> void | ||
| 174 | |||
| 175 | YNegate, /// () -> float | ||
| 176 | |||
| 177 | Amount, | ||
| 178 | }; | ||
| 179 | |||
| 180 | enum class InternalFlag { | ||
| 181 | Zero = 0, | ||
| 182 | Sign = 1, | ||
| 183 | Carry = 2, | ||
| 184 | Overflow = 3, | ||
| 185 | Amount = 4, | ||
| 186 | }; | ||
| 187 | |||
| 188 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 189 | enum class ExitMethod { | ||
| 190 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 191 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 192 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 193 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 194 | }; | ||
| 195 | |||
| 196 | class Sampler { | ||
| 197 | public: | ||
| 198 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, | ||
| 199 | bool is_array, bool is_shadow) | ||
| 200 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} | ||
| 201 | |||
| 202 | std::size_t GetOffset() const { | ||
| 203 | return offset; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::size_t GetIndex() const { | ||
| 207 | return index; | ||
| 208 | } | ||
| 209 | |||
| 210 | Tegra::Shader::TextureType GetType() const { | ||
| 211 | return type; | ||
| 212 | } | ||
| 213 | |||
| 214 | bool IsArray() const { | ||
| 215 | return is_array; | ||
| 216 | } | ||
| 217 | |||
| 218 | bool IsShadow() const { | ||
| 219 | return is_shadow; | ||
| 220 | } | ||
| 221 | |||
| 222 | bool operator<(const Sampler& rhs) const { | ||
| 223 | return std::tie(offset, index, type, is_array, is_shadow) < | ||
| 224 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); | ||
| 225 | } | ||
| 226 | |||
| 227 | private: | ||
| 228 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 229 | /// instruction. | ||
| 230 | std::size_t offset{}; | ||
| 231 | std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. | ||
| 232 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 233 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | ||
| 234 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 235 | }; | ||
| 236 | |||
| 237 | class ConstBuffer { | ||
| 238 | public: | ||
| 239 | void MarkAsUsed(u64 offset) { | ||
| 240 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 241 | } | ||
| 242 | |||
| 243 | void MarkAsUsedIndirect() { | ||
| 244 | is_indirect = true; | ||
| 245 | } | ||
| 246 | |||
| 247 | bool IsIndirect() const { | ||
| 248 | return is_indirect; | ||
| 249 | } | ||
| 250 | |||
| 251 | u32 GetSize() const { | ||
| 252 | return max_offset + sizeof(float); | ||
| 253 | } | ||
| 254 | |||
| 255 | private: | ||
| 256 | u32 max_offset{}; | ||
| 257 | bool is_indirect{}; | ||
| 258 | }; | ||
| 259 | |||
| 260 | struct GlobalMemoryBase { | ||
| 261 | u32 cbuf_index{}; | ||
| 262 | u32 cbuf_offset{}; | ||
| 263 | |||
| 264 | bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 265 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 266 | } | ||
| 267 | }; | ||
| 268 | |||
| 269 | struct MetaArithmetic { | ||
| 270 | bool precise{}; | ||
| 271 | }; | ||
| 272 | |||
| 273 | struct MetaHalfArithmetic { | ||
| 274 | bool precise{}; | ||
| 275 | std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, | ||
| 276 | Tegra::Shader::HalfType::H0_H1, | ||
| 277 | Tegra::Shader::HalfType::H0_H1}; | ||
| 278 | }; | ||
| 279 | |||
| 280 | struct MetaTexture { | ||
| 281 | const Sampler& sampler; | ||
| 282 | u32 element{}; | ||
| 283 | u32 coords_count{}; | ||
| 284 | std::optional<u32> array_index; | ||
| 285 | }; | ||
| 286 | |||
| 287 | constexpr MetaArithmetic PRECISE = {true}; | ||
| 288 | constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 289 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | ||
| 290 | |||
| 291 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; | ||
| 292 | |||
| 293 | /// Holds any kind of operation that can be done in the IR | ||
| 294 | class OperationNode final { | ||
| 295 | public: | ||
| 296 | template <typename... T> | ||
| 297 | explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} | ||
| 298 | |||
| 299 | template <typename... T> | ||
| 300 | explicit constexpr OperationNode(OperationCode code, Meta&& meta) | ||
| 301 | : code{code}, meta{std::move(meta)} {} | ||
| 302 | |||
| 303 | template <typename... T> | ||
| 304 | explicit constexpr OperationNode(OperationCode code, const T*... operands) | ||
| 305 | : OperationNode(code, {}, operands...) {} | ||
| 306 | |||
| 307 | template <typename... T> | ||
| 308 | explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) | ||
| 309 | : code{code}, meta{std::move(meta)} { | ||
| 310 | |||
| 311 | auto operands_list = {operands_...}; | ||
| 312 | for (auto& operand : operands_list) { | ||
| 313 | operands.push_back(operand); | ||
| 314 | } | ||
| 315 | } | ||
| 316 | |||
| 317 | explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) | ||
| 318 | : code{code}, meta{meta}, operands{std::move(operands)} {} | ||
| 319 | |||
| 320 | explicit OperationNode(OperationCode code, std::vector<Node>&& operands) | ||
| 321 | : code{code}, meta{}, operands{std::move(operands)} {} | ||
| 322 | |||
| 323 | OperationCode GetCode() const { | ||
| 324 | return code; | ||
| 325 | } | ||
| 326 | |||
| 327 | const Meta& GetMeta() const { | ||
| 328 | return meta; | ||
| 329 | } | ||
| 330 | |||
| 331 | std::size_t GetOperandsCount() const { | ||
| 332 | return operands.size(); | ||
| 333 | } | ||
| 334 | |||
| 335 | Node operator[](std::size_t operand_index) const { | ||
| 336 | return operands.at(operand_index); | ||
| 337 | } | ||
| 338 | |||
| 339 | private: | ||
| 340 | const OperationCode code; | ||
| 341 | const Meta meta; | ||
| 342 | std::vector<Node> operands; | ||
| 343 | }; | ||
| 344 | |||
| 345 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 346 | class ConditionalNode final { | ||
| 347 | public: | ||
| 348 | explicit ConditionalNode(Node condition, std::vector<Node>&& code) | ||
| 349 | : condition{condition}, code{std::move(code)} {} | ||
| 350 | |||
| 351 | Node GetCondition() const { | ||
| 352 | return condition; | ||
| 353 | } | ||
| 354 | |||
| 355 | const std::vector<Node>& GetCode() const { | ||
| 356 | return code; | ||
| 357 | } | ||
| 358 | |||
| 359 | private: | ||
| 360 | const Node condition; ///< Condition to be satisfied | ||
| 361 | std::vector<Node> code; ///< Code to execute | ||
| 362 | }; | ||
| 363 | |||
| 364 | /// A general purpose register | ||
| 365 | class GprNode final { | ||
| 366 | public: | ||
| 367 | explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} | ||
| 368 | |||
| 369 | u32 GetIndex() const { | ||
| 370 | return static_cast<u32>(index); | ||
| 371 | } | ||
| 372 | |||
| 373 | private: | ||
| 374 | const Tegra::Shader::Register index; | ||
| 375 | }; | ||
| 376 | |||
| 377 | /// A 32-bits value that represents an immediate value | ||
| 378 | class ImmediateNode final { | ||
| 379 | public: | ||
| 380 | explicit constexpr ImmediateNode(u32 value) : value{value} {} | ||
| 381 | |||
| 382 | u32 GetValue() const { | ||
| 383 | return value; | ||
| 384 | } | ||
| 385 | |||
| 386 | private: | ||
| 387 | const u32 value; | ||
| 388 | }; | ||
| 389 | |||
| 390 | /// One of Maxwell's internal flags | ||
| 391 | class InternalFlagNode final { | ||
| 392 | public: | ||
| 393 | explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} | ||
| 394 | |||
| 395 | InternalFlag GetFlag() const { | ||
| 396 | return flag; | ||
| 397 | } | ||
| 398 | |||
| 399 | private: | ||
| 400 | const InternalFlag flag; | ||
| 401 | }; | ||
| 402 | |||
| 403 | /// A predicate register, it can be negated without additional nodes | ||
| 404 | class PredicateNode final { | ||
| 405 | public: | ||
| 406 | explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) | ||
| 407 | : index{index}, negated{negated} {} | ||
| 408 | |||
| 409 | Tegra::Shader::Pred GetIndex() const { | ||
| 410 | return index; | ||
| 411 | } | ||
| 412 | |||
| 413 | bool IsNegated() const { | ||
| 414 | return negated; | ||
| 415 | } | ||
| 416 | |||
| 417 | private: | ||
| 418 | const Tegra::Shader::Pred index; | ||
| 419 | const bool negated; | ||
| 420 | }; | ||
| 421 | |||
| 422 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 423 | class AbufNode final { | ||
| 424 | public: | ||
| 425 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 426 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) | ||
| 427 | : input_mode{input_mode}, buffer{buffer}, index{index}, element{element} {} | ||
| 428 | |||
| 429 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 430 | Node buffer = {}) | ||
| 431 | : input_mode{}, buffer{buffer}, index{index}, element{element} {} | ||
| 432 | |||
| 433 | Tegra::Shader::IpaMode GetInputMode() const { | ||
| 434 | return input_mode; | ||
| 435 | } | ||
| 436 | |||
| 437 | Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 438 | return index; | ||
| 439 | } | ||
| 440 | |||
| 441 | u32 GetElement() const { | ||
| 442 | return element; | ||
| 443 | } | ||
| 444 | |||
| 445 | Node GetBuffer() const { | ||
| 446 | return buffer; | ||
| 447 | } | ||
| 448 | |||
| 449 | private: | ||
| 450 | const Tegra::Shader::IpaMode input_mode; | ||
| 451 | const Node buffer; | ||
| 452 | const Tegra::Shader::Attribute::Index index; | ||
| 453 | const u32 element; | ||
| 454 | }; | ||
| 455 | |||
| 456 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 457 | class CbufNode final { | ||
| 458 | public: | ||
| 459 | explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} | ||
| 460 | |||
| 461 | u32 GetIndex() const { | ||
| 462 | return index; | ||
| 463 | } | ||
| 464 | |||
| 465 | Node GetOffset() const { | ||
| 466 | return offset; | ||
| 467 | } | ||
| 468 | |||
| 469 | private: | ||
| 470 | const u32 index; | ||
| 471 | const Node offset; | ||
| 472 | }; | ||
| 473 | |||
| 474 | /// Local memory node | ||
| 475 | class LmemNode final { | ||
| 476 | public: | ||
| 477 | explicit constexpr LmemNode(Node address) : address{address} {} | ||
| 478 | |||
| 479 | Node GetAddress() const { | ||
| 480 | return address; | ||
| 481 | } | ||
| 482 | |||
| 483 | private: | ||
| 484 | const Node address; | ||
| 485 | }; | ||
| 486 | |||
| 487 | /// Global memory node | ||
| 488 | class GmemNode final { | ||
| 489 | public: | ||
| 490 | explicit constexpr GmemNode(Node real_address, Node base_address, | ||
| 491 | const GlobalMemoryBase& descriptor) | ||
| 492 | : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} | ||
| 493 | |||
| 494 | Node GetRealAddress() const { | ||
| 495 | return real_address; | ||
| 496 | } | ||
| 497 | |||
| 498 | Node GetBaseAddress() const { | ||
| 499 | return base_address; | ||
| 500 | } | ||
| 501 | |||
| 502 | const GlobalMemoryBase& GetDescriptor() const { | ||
| 503 | return descriptor; | ||
| 504 | } | ||
| 505 | |||
| 506 | private: | ||
| 507 | const Node real_address; | ||
| 508 | const Node base_address; | ||
| 509 | const GlobalMemoryBase descriptor; | ||
| 510 | }; | ||
| 511 | |||
| 512 | /// Commentary, can be dropped | ||
| 513 | class CommentNode final { | ||
| 514 | public: | ||
| 515 | explicit CommentNode(std::string text) : text{std::move(text)} {} | ||
| 516 | |||
| 517 | const std::string& GetText() const { | ||
| 518 | return text; | ||
| 519 | } | ||
| 520 | |||
| 521 | private: | ||
| 522 | std::string text; | ||
| 523 | }; | ||
| 524 | |||
| 525 | class ShaderIR final { | ||
| 526 | public: | ||
| 527 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) | ||
| 528 | : program_code{program_code}, main_offset{main_offset} { | ||
| 529 | |||
| 530 | Decode(); | ||
| 531 | } | ||
| 532 | |||
| 533 | const std::map<u32, BasicBlock>& GetBasicBlocks() const { | ||
| 534 | return basic_blocks; | ||
| 535 | } | ||
| 536 | |||
| 537 | const std::set<u32>& GetRegisters() const { | ||
| 538 | return used_registers; | ||
| 539 | } | ||
| 540 | |||
| 541 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 542 | return used_predicates; | ||
| 543 | } | ||
| 544 | |||
| 545 | const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& | ||
| 546 | GetInputAttributes() const { | ||
| 547 | return used_input_attributes; | ||
| 548 | } | ||
| 549 | |||
| 550 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 551 | return used_output_attributes; | ||
| 552 | } | ||
| 553 | |||
| 554 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 555 | return used_cbufs; | ||
| 556 | } | ||
| 557 | |||
| 558 | const std::set<Sampler>& GetSamplers() const { | ||
| 559 | return used_samplers; | ||
| 560 | } | ||
| 561 | |||
| 562 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 563 | const { | ||
| 564 | return used_clip_distances; | ||
| 565 | } | ||
| 566 | |||
| 567 | const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { | ||
| 568 | return used_global_memory_bases; | ||
| 569 | } | ||
| 570 | |||
| 571 | std::size_t GetLength() const { | ||
| 572 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 573 | } | ||
| 574 | |||
| 575 | const Tegra::Shader::Header& GetHeader() const { | ||
| 576 | return header; | ||
| 577 | } | ||
| 578 | |||
| 579 | private: | ||
| 580 | void Decode(); | ||
| 581 | |||
| 582 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 583 | |||
| 584 | BasicBlock DecodeRange(u32 begin, u32 end); | ||
| 585 | |||
| 586 | /** | ||
| 587 | * Decodes a single instruction from Tegra to IR. | ||
| 588 | * @param bb Basic block where the nodes will be written to. | ||
| 589 | * @param pc Program counter. Offset to decode. | ||
| 590 | * @return Next address to decode. | ||
| 591 | */ | ||
| 592 | u32 DecodeInstr(BasicBlock& bb, u32 pc); | ||
| 593 | |||
| 594 | u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 595 | u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 596 | u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 597 | u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 598 | u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 599 | u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 600 | u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 601 | u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 602 | u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 603 | u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 604 | u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 605 | u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 606 | u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 607 | u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 608 | u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 609 | u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 610 | u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 611 | u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 612 | u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 613 | u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 614 | u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 615 | u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 616 | u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 617 | u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 618 | u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 619 | |||
| 620 | /// Internalizes node's data and returns a managed pointer to a clone of that node | ||
| 621 | Node StoreNode(NodeData&& node_data); | ||
| 622 | |||
| 623 | /// Creates a conditional node | ||
| 624 | Node Conditional(Node condition, std::vector<Node>&& code); | ||
| 625 | /// Creates a commentary | ||
| 626 | Node Comment(const std::string& text); | ||
| 627 | /// Creates an u32 immediate | ||
| 628 | Node Immediate(u32 value); | ||
| 629 | /// Creates a s32 immediate | ||
| 630 | Node Immediate(s32 value) { | ||
| 631 | return Immediate(static_cast<u32>(value)); | ||
| 632 | } | ||
| 633 | /// Creates a f32 immediate | ||
| 634 | Node Immediate(f32 value) { | ||
| 635 | u32 integral; | ||
| 636 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 637 | return Immediate(integral); | ||
| 638 | } | ||
| 639 | |||
| 640 | /// Generates a node for a passed register. | ||
| 641 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 642 | /// Generates a node representing a 19-bit immediate value | ||
| 643 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 644 | /// Generates a node representing a 32-bit immediate value | ||
| 645 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 646 | /// Generates a node representing a constant buffer | ||
| 647 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 648 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 649 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 650 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 651 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 652 | /// Generates a predicate node for an immediate true or false value | ||
| 653 | Node GetPredicate(bool immediate); | ||
| 654 | /// Generates a node representing an input attribute. Keeps track of used attributes. | ||
| 655 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, | ||
| 656 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); | ||
| 657 | /// Generates a node representing an output attribute. Keeps track of used attributes. | ||
| 658 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 659 | /// Generates a node representing an internal flag | ||
| 660 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | ||
| 661 | /// Generates a node representing a local memory address | ||
| 662 | Node GetLocalMemory(Node address); | ||
| 663 | /// Generates a temporal, internally it uses a post-RZ register | ||
| 664 | Node GetTemporal(u32 id); | ||
| 665 | |||
| 666 | /// Sets a register. src value must be a number-evaluated node. | ||
| 667 | void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 668 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 669 | void SetPredicate(BasicBlock& bb, u64 dest, Node src); | ||
| 670 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 671 | void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); | ||
| 672 | /// Sets a local memory address. address and value must be a number-evaluated node | ||
| 673 | void SetLocalMemory(BasicBlock& bb, Node address, Node value); | ||
| 674 | /// Sets a temporal. Internally it uses a post-RZ register | ||
| 675 | void SetTemporal(BasicBlock& bb, u32 id, Node value); | ||
| 676 | |||
| 677 | /// Sets internal flags from a float | ||
| 678 | void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 679 | /// Sets internal flags from an integer | ||
| 680 | void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 681 | |||
| 682 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 683 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 684 | /// Conditionally saturates a float | ||
| 685 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 686 | |||
| 687 | /// Converts an integer to different sizes. | ||
| 688 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 689 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 690 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 691 | |||
| 692 | /// Unpacks a half immediate from an instruction | ||
| 693 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 694 | /// Merges a half pair into another value | ||
| 695 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 696 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 697 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 698 | |||
| 699 | /// Returns a predicate comparing two floats | ||
| 700 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 701 | /// Returns a predicate comparing two integers | ||
| 702 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 703 | Node op_a, Node op_b); | ||
| 704 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 705 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 706 | const MetaHalfArithmetic& meta, Node op_a, Node op_b); | ||
| 707 | |||
| 708 | /// Returns a predicate combiner operation | ||
| 709 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 710 | |||
| 711 | /// Returns a condition code evaluated from internal flags | ||
| 712 | Node GetConditionCode(Tegra::Shader::ConditionCode cc); | ||
| 713 | |||
| 714 | /// Accesses a texture sampler | ||
| 715 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | ||
| 716 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | ||
| 717 | |||
| 718 | /// Extracts a sequence of bits from a node | ||
| 719 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 720 | |||
| 721 | void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 722 | const Node4& components); | ||
| 723 | |||
| 724 | void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 725 | const Node4& components); | ||
| 726 | void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 727 | const Node4& components); | ||
| 728 | |||
| 729 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 730 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 731 | bool is_array); | ||
| 732 | |||
| 733 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 734 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 735 | bool is_array); | ||
| 736 | |||
| 737 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 738 | bool depth_compare, bool is_array); | ||
| 739 | |||
| 740 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 741 | bool is_array); | ||
| 742 | |||
| 743 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 744 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 745 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 746 | |||
| 747 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 748 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 749 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | ||
| 750 | std::vector<Node>&& coords); | ||
| 751 | |||
| 752 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 753 | u64 byte_height); | ||
| 754 | |||
| 755 | void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, | ||
| 756 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 757 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 758 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 759 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 760 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 761 | |||
| 762 | Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); | ||
| 763 | |||
| 764 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); | ||
| 765 | |||
| 766 | template <typename... T> | ||
| 767 | Node Operation(OperationCode code, const T*... operands) { | ||
| 768 | return StoreNode(OperationNode(code, operands...)); | ||
| 769 | } | ||
| 770 | |||
| 771 | template <typename... T> | ||
| 772 | Node Operation(OperationCode code, Meta&& meta, const T*... operands) { | ||
| 773 | return StoreNode(OperationNode(code, std::move(meta), operands...)); | ||
| 774 | } | ||
| 775 | |||
| 776 | template <typename... T> | ||
| 777 | Node Operation(OperationCode code, std::vector<Node>&& operands) { | ||
| 778 | return StoreNode(OperationNode(code, std::move(operands))); | ||
| 779 | } | ||
| 780 | |||
| 781 | template <typename... T> | ||
| 782 | Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { | ||
| 783 | return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); | ||
| 784 | } | ||
| 785 | |||
| 786 | template <typename... T> | ||
| 787 | Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { | ||
| 788 | return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); | ||
| 789 | } | ||
| 790 | |||
| 791 | template <typename... T> | ||
| 792 | Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { | ||
| 793 | return StoreNode( | ||
| 794 | OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); | ||
| 795 | } | ||
| 796 | |||
| 797 | static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 798 | |||
| 799 | const ProgramCode& program_code; | ||
| 800 | const u32 main_offset; | ||
| 801 | |||
| 802 | u32 coverage_begin{}; | ||
| 803 | u32 coverage_end{}; | ||
| 804 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 805 | |||
| 806 | std::map<u32, BasicBlock> basic_blocks; | ||
| 807 | |||
| 808 | std::vector<std::unique_ptr<NodeData>> stored_nodes; | ||
| 809 | |||
| 810 | std::set<u32> used_registers; | ||
| 811 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 812 | std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> | ||
| 813 | used_input_attributes; | ||
| 814 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 815 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 816 | std::set<Sampler> used_samplers; | ||
| 817 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 818 | std::set<GlobalMemoryBase> used_global_memory_bases; | ||
| 819 | |||
| 820 | Tegra::Shader::Header header; | ||
| 821 | }; | ||
| 822 | |||
| 823 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp new file mode 100644 index 000000000..d6d29ee9f --- /dev/null +++ b/src/video_core/shader/track.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, | ||
| 15 | OperationCode operation_code) { | ||
| 16 | for (; cursor >= 0; --cursor) { | ||
| 17 | const Node node = code[cursor]; | ||
| 18 | if (const auto operation = std::get_if<OperationNode>(node)) { | ||
| 19 | if (operation->GetCode() == operation_code) | ||
| 20 | return {node, cursor}; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | return {}; | ||
| 24 | } | ||
| 25 | } // namespace | ||
| 26 | |||
| 27 | Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { | ||
| 28 | if (const auto cbuf = std::get_if<CbufNode>(tracked)) { | ||
| 29 | // Cbuf found, but it has to be immediate | ||
| 30 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | ||
| 31 | } | ||
| 32 | if (const auto gpr = std::get_if<GprNode>(tracked)) { | ||
| 33 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 34 | return nullptr; | ||
| 35 | } | ||
| 36 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 37 | // register that it uses as operand | ||
| 38 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 39 | if (!source) { | ||
| 40 | return nullptr; | ||
| 41 | } | ||
| 42 | return TrackCbuf(source, code, new_cursor); | ||
| 43 | } | ||
| 44 | if (const auto operation = std::get_if<OperationNode>(tracked)) { | ||
| 45 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | ||
| 46 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | ||
| 47 | // Cbuf found in operand | ||
| 48 | return found; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | return nullptr; | ||
| 52 | } | ||
| 53 | return nullptr; | ||
| 54 | } | ||
| 55 | |||
| 56 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, | ||
| 57 | s64 cursor) { | ||
| 58 | for (; cursor >= 0; --cursor) { | ||
| 59 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 60 | if (!found_node) { | ||
| 61 | return {}; | ||
| 62 | } | ||
| 63 | const auto operation = std::get_if<OperationNode>(found_node); | ||
| 64 | ASSERT(operation); | ||
| 65 | |||
| 66 | const auto& target = (*operation)[0]; | ||
| 67 | if (const auto gpr_target = std::get_if<GprNode>(target)) { | ||
| 68 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 69 | return {(*operation)[1], new_cursor}; | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | return {}; | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1a344229f..2f6612a35 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -50,6 +50,24 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { | |||
| 50 | } | 50 | } |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | bool SurfaceTargetIsArray(SurfaceTarget target) { | ||
| 54 | switch (target) { | ||
| 55 | case SurfaceTarget::Texture1D: | ||
| 56 | case SurfaceTarget::Texture2D: | ||
| 57 | case SurfaceTarget::Texture3D: | ||
| 58 | case SurfaceTarget::TextureCubemap: | ||
| 59 | return false; | ||
| 60 | case SurfaceTarget::Texture1DArray: | ||
| 61 | case SurfaceTarget::Texture2DArray: | ||
| 62 | case SurfaceTarget::TextureCubeArray: | ||
| 63 | return true; | ||
| 64 | default: | ||
| 65 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 66 | UNREACHABLE(); | ||
| 67 | return false; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 53 | PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { | 71 | PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { |
| 54 | switch (format) { | 72 | switch (format) { |
| 55 | case Tegra::DepthFormat::S8_Z24_UNORM: | 73 | case Tegra::DepthFormat::S8_Z24_UNORM: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index c2259c3c2..b783e4b27 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -109,8 +109,7 @@ enum class SurfaceType { | |||
| 109 | ColorTexture = 0, | 109 | ColorTexture = 0, |
| 110 | Depth = 1, | 110 | Depth = 1, |
| 111 | DepthStencil = 2, | 111 | DepthStencil = 2, |
| 112 | Fill = 3, | 112 | Invalid = 3, |
| 113 | Invalid = 4, | ||
| 114 | }; | 113 | }; |
| 115 | 114 | ||
| 116 | enum class SurfaceTarget { | 115 | enum class SurfaceTarget { |
| @@ -441,6 +440,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t | |||
| 441 | 440 | ||
| 442 | bool SurfaceTargetIsLayered(SurfaceTarget target); | 441 | bool SurfaceTargetIsLayered(SurfaceTarget target); |
| 443 | 442 | ||
| 443 | bool SurfaceTargetIsArray(SurfaceTarget target); | ||
| 444 | |||
| 444 | PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format); | 445 | PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format); |
| 445 | 446 | ||
| 446 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format); | 447 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format); |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e7c78bee2..bdb40dacf 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -182,7 +182,7 @@ struct TICEntry { | |||
| 182 | }; | 182 | }; |
| 183 | union { | 183 | union { |
| 184 | BitField<0, 16, u32> height_minus_1; | 184 | BitField<0, 16, u32> height_minus_1; |
| 185 | BitField<16, 15, u32> depth_minus_1; | 185 | BitField<16, 14, u32> depth_minus_1; |
| 186 | }; | 186 | }; |
| 187 | union { | 187 | union { |
| 188 | BitField<6, 13, u32> mip_lod_bias; | 188 | BitField<6, 13, u32> mip_lod_bias; |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index f7de3471b..0b8ccdd44 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -16,9 +16,10 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { | 18 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { |
| 19 | return !Settings::values.resolution_factor | 19 | return static_cast<u16>( |
| 20 | ? renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio() | 20 | Settings::values.resolution_factor |
| 21 | : Settings::values.resolution_factor; | 21 | ? Settings::values.resolution_factor |
| 22 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); | ||
| 22 | } | 23 | } |
| 23 | 24 | ||
| 24 | } // namespace VideoCore | 25 | } // namespace VideoCore |
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index 0c0864742..f50225d5f 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | #include "core/hle/kernel/readable_event.h" | 13 | #include "core/hle/kernel/readable_event.h" |
| 14 | #include "core/hle/kernel/scheduler.h" | 14 | #include "core/hle/kernel/scheduler.h" |
| 15 | #include "core/hle/kernel/thread.h" | 15 | #include "core/hle/kernel/thread.h" |
| 16 | #include "core/hle/kernel/timer.h" | ||
| 17 | #include "core/hle/kernel/wait_object.h" | 16 | #include "core/hle/kernel/wait_object.h" |
| 18 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 19 | 18 | ||
| @@ -155,8 +154,6 @@ std::unique_ptr<WaitTreeWaitObject> WaitTreeWaitObject::make(const Kernel::WaitO | |||
| 155 | switch (object.GetHandleType()) { | 154 | switch (object.GetHandleType()) { |
| 156 | case Kernel::HandleType::ReadableEvent: | 155 | case Kernel::HandleType::ReadableEvent: |
| 157 | return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); | 156 | return std::make_unique<WaitTreeEvent>(static_cast<const Kernel::ReadableEvent&>(object)); |
| 158 | case Kernel::HandleType::Timer: | ||
| 159 | return std::make_unique<WaitTreeTimer>(static_cast<const Kernel::Timer&>(object)); | ||
| 160 | case Kernel::HandleType::Thread: | 157 | case Kernel::HandleType::Thread: |
| 161 | return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); | 158 | return std::make_unique<WaitTreeThread>(static_cast<const Kernel::Thread&>(object)); |
| 162 | default: | 159 | default: |
| @@ -348,23 +345,6 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeEvent::GetChildren() const { | |||
| 348 | return list; | 345 | return list; |
| 349 | } | 346 | } |
| 350 | 347 | ||
| 351 | WaitTreeTimer::WaitTreeTimer(const Kernel::Timer& object) : WaitTreeWaitObject(object) {} | ||
| 352 | WaitTreeTimer::~WaitTreeTimer() = default; | ||
| 353 | |||
| 354 | std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeTimer::GetChildren() const { | ||
| 355 | std::vector<std::unique_ptr<WaitTreeItem>> list(WaitTreeWaitObject::GetChildren()); | ||
| 356 | |||
| 357 | const auto& timer = static_cast<const Kernel::Timer&>(object); | ||
| 358 | |||
| 359 | list.push_back(std::make_unique<WaitTreeText>( | ||
| 360 | tr("reset type = %1").arg(GetResetTypeQString(timer.GetResetType())))); | ||
| 361 | list.push_back( | ||
| 362 | std::make_unique<WaitTreeText>(tr("initial delay = %1").arg(timer.GetInitialDelay()))); | ||
| 363 | list.push_back( | ||
| 364 | std::make_unique<WaitTreeText>(tr("interval delay = %1").arg(timer.GetIntervalDelay()))); | ||
| 365 | return list; | ||
| 366 | } | ||
| 367 | |||
| 368 | WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list) | 348 | WaitTreeThreadList::WaitTreeThreadList(const std::vector<Kernel::SharedPtr<Kernel::Thread>>& list) |
| 369 | : thread_list(list) {} | 349 | : thread_list(list) {} |
| 370 | WaitTreeThreadList::~WaitTreeThreadList() = default; | 350 | WaitTreeThreadList::~WaitTreeThreadList() = default; |
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index e639ef412..365c3dbfe 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h | |||
| @@ -20,7 +20,6 @@ namespace Kernel { | |||
| 20 | class ReadableEvent; | 20 | class ReadableEvent; |
| 21 | class WaitObject; | 21 | class WaitObject; |
| 22 | class Thread; | 22 | class Thread; |
| 23 | class Timer; | ||
| 24 | } // namespace Kernel | 23 | } // namespace Kernel |
| 25 | 24 | ||
| 26 | class WaitTreeThread; | 25 | class WaitTreeThread; |
| @@ -150,15 +149,6 @@ public: | |||
| 150 | std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; | 149 | std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; |
| 151 | }; | 150 | }; |
| 152 | 151 | ||
| 153 | class WaitTreeTimer : public WaitTreeWaitObject { | ||
| 154 | Q_OBJECT | ||
| 155 | public: | ||
| 156 | explicit WaitTreeTimer(const Kernel::Timer& object); | ||
| 157 | ~WaitTreeTimer() override; | ||
| 158 | |||
| 159 | std::vector<std::unique_ptr<WaitTreeItem>> GetChildren() const override; | ||
| 160 | }; | ||
| 161 | |||
| 162 | class WaitTreeThreadList : public WaitTreeExpandableItem { | 152 | class WaitTreeThreadList : public WaitTreeExpandableItem { |
| 163 | Q_OBJECT | 153 | Q_OBJECT |
| 164 | public: | 154 | public: |
diff --git a/src/yuzu/loading_screen.ui b/src/yuzu/loading_screen.ui index a67d273fd..820b47536 100644 --- a/src/yuzu/loading_screen.ui +++ b/src/yuzu/loading_screen.ui | |||
| @@ -132,7 +132,7 @@ border-radius: 15px; | |||
| 132 | font: 75 15pt "Arial";</string> | 132 | font: 75 15pt "Arial";</string> |
| 133 | </property> | 133 | </property> |
| 134 | <property name="text"> | 134 | <property name="text"> |
| 135 | <string>Stage 1 of 2. Estimate Time 5m 4s</string> | 135 | <string>Estimated Time 5m 4s</string> |
| 136 | </property> | 136 | </property> |
| 137 | </widget> | 137 | </widget> |
| 138 | </item> | 138 | </item> |
| @@ -146,6 +146,9 @@ font: 75 15pt "Arial";</string> | |||
| 146 | <property name="text"> | 146 | <property name="text"> |
| 147 | <string/> | 147 | <string/> |
| 148 | </property> | 148 | </property> |
| 149 | <property name="alignment"> | ||
| 150 | <set>Qt::AlignCenter</set> | ||
| 151 | </property> | ||
| 149 | <property name="margin"> | 152 | <property name="margin"> |
| 150 | <number>30</number> | 153 | <number>30</number> |
| 151 | </property> | 154 | </property> |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index ab403b3ac..485e29de2 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -1682,12 +1682,16 @@ void GMainWindow::OnToggleFilterBar() { | |||
| 1682 | 1682 | ||
| 1683 | void GMainWindow::OnCaptureScreenshot() { | 1683 | void GMainWindow::OnCaptureScreenshot() { |
| 1684 | OnPauseGame(); | 1684 | OnPauseGame(); |
| 1685 | const QString path = | 1685 | QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path, |
| 1686 | QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), | 1686 | tr("PNG Image (*.png)")); |
| 1687 | UISettings::values.screenshot_path, tr("PNG Image (*.png)")); | 1687 | png_dialog.setAcceptMode(QFileDialog::AcceptSave); |
| 1688 | if (!path.isEmpty()) { | 1688 | png_dialog.setDefaultSuffix("png"); |
| 1689 | UISettings::values.screenshot_path = QFileInfo(path).path(); | 1689 | if (png_dialog.exec()) { |
| 1690 | render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path); | 1690 | const QString path = png_dialog.selectedFiles().first(); |
| 1691 | if (!path.isEmpty()) { | ||
| 1692 | UISettings::values.screenshot_path = QFileInfo(path).path(); | ||
| 1693 | render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path); | ||
| 1694 | } | ||
| 1691 | } | 1695 | } |
| 1692 | OnStartGame(); | 1696 | OnStartGame(); |
| 1693 | } | 1697 | } |