diff options
Diffstat (limited to 'src')
75 files changed, 896 insertions, 455 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 4dfd41b43..978b1518f 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h | |||
| @@ -7,6 +7,10 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | 9 | ||
| 10 | namespace Common { | ||
| 11 | struct PageTable; | ||
| 12 | } | ||
| 13 | |||
| 10 | namespace Kernel { | 14 | namespace Kernel { |
| 11 | enum class VMAPermission : u8; | 15 | enum class VMAPermission : u8; |
| 12 | } | 16 | } |
| @@ -49,8 +53,14 @@ public: | |||
| 49 | /// Clear all instruction cache | 53 | /// Clear all instruction cache |
| 50 | virtual void ClearInstructionCache() = 0; | 54 | virtual void ClearInstructionCache() = 0; |
| 51 | 55 | ||
| 52 | /// Notify CPU emulation that page tables have changed | 56 | /// Notifies CPU emulation that the current page table has changed. |
| 53 | virtual void PageTableChanged() = 0; | 57 | /// |
| 58 | /// @param new_page_table The new page table. | ||
| 59 | /// @param new_address_space_size_in_bits The new usable size of the address space in bits. | ||
| 60 | /// This can be either 32, 36, or 39 on official software. | ||
| 61 | /// | ||
| 62 | virtual void PageTableChanged(Common::PageTable& new_page_table, | ||
| 63 | std::size_t new_address_space_size_in_bits) = 0; | ||
| 54 | 64 | ||
| 55 | /** | 65 | /** |
| 56 | * Set the Program Counter to an address | 66 | * Set the Program Counter to an address |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index dc96e35d5..44307fa19 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| 15 | #include "core/core_timing_util.h" | 15 | #include "core/core_timing_util.h" |
| 16 | #include "core/gdbstub/gdbstub.h" | 16 | #include "core/gdbstub/gdbstub.h" |
| 17 | #include "core/hle/kernel/kernel.h" | ||
| 18 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 19 | #include "core/hle/kernel/svc.h" | 18 | #include "core/hle/kernel/svc.h" |
| 20 | #include "core/hle/kernel/vm_manager.h" | 19 | #include "core/hle/kernel/vm_manager.h" |
| @@ -129,18 +128,16 @@ public: | |||
| 129 | u64 tpidr_el0 = 0; | 128 | u64 tpidr_el0 = 0; |
| 130 | }; | 129 | }; |
| 131 | 130 | ||
| 132 | std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { | 131 | std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table, |
| 133 | auto* current_process = system.Kernel().CurrentProcess(); | 132 | std::size_t address_space_bits) const { |
| 134 | auto** const page_table = current_process->VMManager().page_table.pointers.data(); | ||
| 135 | |||
| 136 | Dynarmic::A64::UserConfig config; | 133 | Dynarmic::A64::UserConfig config; |
| 137 | 134 | ||
| 138 | // Callbacks | 135 | // Callbacks |
| 139 | config.callbacks = cb.get(); | 136 | config.callbacks = cb.get(); |
| 140 | 137 | ||
| 141 | // Memory | 138 | // Memory |
| 142 | config.page_table = reinterpret_cast<void**>(page_table); | 139 | config.page_table = reinterpret_cast<void**>(page_table.pointers.data()); |
| 143 | config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth(); | 140 | config.page_table_address_space_bits = address_space_bits; |
| 144 | config.silently_mirror_page_table = false; | 141 | config.silently_mirror_page_table = false; |
| 145 | 142 | ||
| 146 | // Multi-process state | 143 | // Multi-process state |
| @@ -176,12 +173,7 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, | |||
| 176 | std::size_t core_index) | 173 | std::size_t core_index) |
| 177 | : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, | 174 | : cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system}, |
| 178 | core_index{core_index}, system{system}, | 175 | core_index{core_index}, system{system}, |
| 179 | exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} { | 176 | exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} |
| 180 | ThreadContext ctx{}; | ||
| 181 | inner_unicorn.SaveContext(ctx); | ||
| 182 | PageTableChanged(); | ||
| 183 | LoadContext(ctx); | ||
| 184 | } | ||
| 185 | 177 | ||
| 186 | ARM_Dynarmic::~ARM_Dynarmic() = default; | 178 | ARM_Dynarmic::~ARM_Dynarmic() = default; |
| 187 | 179 | ||
| @@ -276,8 +268,9 @@ void ARM_Dynarmic::ClearExclusiveState() { | |||
| 276 | jit->ClearExclusiveState(); | 268 | jit->ClearExclusiveState(); |
| 277 | } | 269 | } |
| 278 | 270 | ||
| 279 | void ARM_Dynarmic::PageTableChanged() { | 271 | void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table, |
| 280 | jit = MakeJit(); | 272 | std::size_t new_address_space_size_in_bits) { |
| 273 | jit = MakeJit(page_table, new_address_space_size_in_bits); | ||
| 281 | } | 274 | } |
| 282 | 275 | ||
| 283 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} | 276 | DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {} |
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h index c1db254e8..b701e97a3 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.h +++ b/src/core/arm/dynarmic/arm_dynarmic.h | |||
| @@ -48,10 +48,12 @@ public: | |||
| 48 | void ClearExclusiveState() override; | 48 | void ClearExclusiveState() override; |
| 49 | 49 | ||
| 50 | void ClearInstructionCache() override; | 50 | void ClearInstructionCache() override; |
| 51 | void PageTableChanged() override; | 51 | void PageTableChanged(Common::PageTable& new_page_table, |
| 52 | std::size_t new_address_space_size_in_bits) override; | ||
| 52 | 53 | ||
| 53 | private: | 54 | private: |
| 54 | std::unique_ptr<Dynarmic::A64::Jit> MakeJit() const; | 55 | std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table, |
| 56 | std::size_t address_space_bits) const; | ||
| 55 | 57 | ||
| 56 | friend class ARM_Dynarmic_Callbacks; | 58 | friend class ARM_Dynarmic_Callbacks; |
| 57 | std::unique_ptr<ARM_Dynarmic_Callbacks> cb; | 59 | std::unique_ptr<ARM_Dynarmic_Callbacks> cb; |
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h index 209fc16ad..34e974b4d 100644 --- a/src/core/arm/unicorn/arm_unicorn.h +++ b/src/core/arm/unicorn/arm_unicorn.h | |||
| @@ -41,7 +41,7 @@ public: | |||
| 41 | void Run() override; | 41 | void Run() override; |
| 42 | void Step() override; | 42 | void Step() override; |
| 43 | void ClearInstructionCache() override; | 43 | void ClearInstructionCache() override; |
| 44 | void PageTableChanged() override{}; | 44 | void PageTableChanged(Common::PageTable&, std::size_t) override {} |
| 45 | void RecordBreak(GDBStub::BreakpointAddress bkpt); | 45 | void RecordBreak(GDBStub::BreakpointAddress bkpt); |
| 46 | 46 | ||
| 47 | private: | 47 | private: |
diff --git a/src/core/core.cpp b/src/core/core.cpp index bc9e887b6..175a5f2ea 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -3,9 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <map> | ||
| 7 | #include <memory> | 6 | #include <memory> |
| 8 | #include <thread> | ||
| 9 | #include <utility> | 7 | #include <utility> |
| 10 | 8 | ||
| 11 | #include "common/file_util.h" | 9 | #include "common/file_util.h" |
| @@ -38,8 +36,6 @@ | |||
| 38 | #include "frontend/applets/software_keyboard.h" | 36 | #include "frontend/applets/software_keyboard.h" |
| 39 | #include "frontend/applets/web_browser.h" | 37 | #include "frontend/applets/web_browser.h" |
| 40 | #include "video_core/debug_utils/debug_utils.h" | 38 | #include "video_core/debug_utils/debug_utils.h" |
| 41 | #include "video_core/gpu_asynch.h" | ||
| 42 | #include "video_core/gpu_synch.h" | ||
| 43 | #include "video_core/renderer_base.h" | 39 | #include "video_core/renderer_base.h" |
| 44 | #include "video_core/video_core.h" | 40 | #include "video_core/video_core.h" |
| 45 | 41 | ||
| @@ -81,7 +77,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 81 | return vfs->OpenFile(path, FileSys::Mode::Read); | 77 | return vfs->OpenFile(path, FileSys::Mode::Read); |
| 82 | } | 78 | } |
| 83 | struct System::Impl { | 79 | struct System::Impl { |
| 84 | explicit Impl(System& system) : kernel{system} {} | 80 | explicit Impl(System& system) : kernel{system}, cpu_core_manager{system} {} |
| 85 | 81 | ||
| 86 | Cpu& CurrentCpuCore() { | 82 | Cpu& CurrentCpuCore() { |
| 87 | return cpu_core_manager.GetCurrentCore(); | 83 | return cpu_core_manager.GetCurrentCore(); |
| @@ -99,6 +95,7 @@ struct System::Impl { | |||
| 99 | LOG_DEBUG(HW_Memory, "initialized OK"); | 95 | LOG_DEBUG(HW_Memory, "initialized OK"); |
| 100 | 96 | ||
| 101 | core_timing.Initialize(); | 97 | core_timing.Initialize(); |
| 98 | cpu_core_manager.Initialize(); | ||
| 102 | kernel.Initialize(); | 99 | kernel.Initialize(); |
| 103 | 100 | ||
| 104 | const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( | 101 | const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( |
| @@ -120,9 +117,6 @@ struct System::Impl { | |||
| 120 | if (web_browser == nullptr) | 117 | if (web_browser == nullptr) |
| 121 | web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); | 118 | web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); |
| 122 | 119 | ||
| 123 | auto main_process = Kernel::Process::Create(system, "main"); | ||
| 124 | kernel.MakeCurrentProcess(main_process.get()); | ||
| 125 | |||
| 126 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 120 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| 127 | service_manager = std::make_shared<Service::SM::ServiceManager>(); | 121 | service_manager = std::make_shared<Service::SM::ServiceManager>(); |
| 128 | 122 | ||
| @@ -134,15 +128,9 @@ struct System::Impl { | |||
| 134 | return ResultStatus::ErrorVideoCore; | 128 | return ResultStatus::ErrorVideoCore; |
| 135 | } | 129 | } |
| 136 | 130 | ||
| 137 | is_powered_on = true; | 131 | gpu_core = VideoCore::CreateGPU(system); |
| 138 | |||
| 139 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 140 | gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); | ||
| 141 | } else { | ||
| 142 | gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); | ||
| 143 | } | ||
| 144 | 132 | ||
| 145 | cpu_core_manager.Initialize(system); | 133 | is_powered_on = true; |
| 146 | 134 | ||
| 147 | LOG_DEBUG(Core, "Initialized OK"); | 135 | LOG_DEBUG(Core, "Initialized OK"); |
| 148 | 136 | ||
| @@ -179,7 +167,8 @@ struct System::Impl { | |||
| 179 | return init_result; | 167 | return init_result; |
| 180 | } | 168 | } |
| 181 | 169 | ||
| 182 | const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())}; | 170 | auto main_process = Kernel::Process::Create(system, "main"); |
| 171 | const auto [load_result, load_parameters] = app_loader->Load(*main_process); | ||
| 183 | if (load_result != Loader::ResultStatus::Success) { | 172 | if (load_result != Loader::ResultStatus::Success) { |
| 184 | LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); | 173 | LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result)); |
| 185 | Shutdown(); | 174 | Shutdown(); |
| @@ -187,6 +176,16 @@ struct System::Impl { | |||
| 187 | return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + | 176 | return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + |
| 188 | static_cast<u32>(load_result)); | 177 | static_cast<u32>(load_result)); |
| 189 | } | 178 | } |
| 179 | kernel.MakeCurrentProcess(main_process.get()); | ||
| 180 | |||
| 181 | // Main process has been loaded and been made current. | ||
| 182 | // Begin GPU and CPU execution. | ||
| 183 | gpu_core->Start(); | ||
| 184 | cpu_core_manager.StartThreads(); | ||
| 185 | |||
| 186 | // All threads are started, begin main process execution, now that we're in the clear. | ||
| 187 | main_process->Run(load_parameters->main_thread_priority, | ||
| 188 | load_parameters->main_thread_stack_size); | ||
| 190 | 189 | ||
| 191 | status = ResultStatus::Success; | 190 | status = ResultStatus::Success; |
| 192 | return status; | 191 | return status; |
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 93bc5619c..8fcb4eeb1 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp | |||
| @@ -19,17 +19,19 @@ void RunCpuCore(const System& system, Cpu& cpu_state) { | |||
| 19 | } | 19 | } |
| 20 | } // Anonymous namespace | 20 | } // Anonymous namespace |
| 21 | 21 | ||
| 22 | CpuCoreManager::CpuCoreManager() = default; | 22 | CpuCoreManager::CpuCoreManager(System& system) : system{system} {} |
| 23 | CpuCoreManager::~CpuCoreManager() = default; | 23 | CpuCoreManager::~CpuCoreManager() = default; |
| 24 | 24 | ||
| 25 | void CpuCoreManager::Initialize(System& system) { | 25 | void CpuCoreManager::Initialize() { |
| 26 | barrier = std::make_unique<CpuBarrier>(); | 26 | barrier = std::make_unique<CpuBarrier>(); |
| 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); | 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); |
| 28 | 28 | ||
| 29 | for (std::size_t index = 0; index < cores.size(); ++index) { | 29 | for (std::size_t index = 0; index < cores.size(); ++index) { |
| 30 | cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); | 30 | cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); |
| 31 | } | 31 | } |
| 32 | } | ||
| 32 | 33 | ||
| 34 | void CpuCoreManager::StartThreads() { | ||
| 33 | // Create threads for CPU cores 1-3, and build thread_to_cpu map | 35 | // Create threads for CPU cores 1-3, and build thread_to_cpu map |
| 34 | // CPU core 0 is run on the main thread | 36 | // CPU core 0 is run on the main thread |
| 35 | thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); | 37 | thread_to_cpu[std::this_thread::get_id()] = cores[0].get(); |
diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h index a4d70ec56..2cbbf8216 100644 --- a/src/core/cpu_core_manager.h +++ b/src/core/cpu_core_manager.h | |||
| @@ -18,7 +18,7 @@ class System; | |||
| 18 | 18 | ||
| 19 | class CpuCoreManager { | 19 | class CpuCoreManager { |
| 20 | public: | 20 | public: |
| 21 | CpuCoreManager(); | 21 | explicit CpuCoreManager(System& system); |
| 22 | CpuCoreManager(const CpuCoreManager&) = delete; | 22 | CpuCoreManager(const CpuCoreManager&) = delete; |
| 23 | CpuCoreManager(CpuCoreManager&&) = delete; | 23 | CpuCoreManager(CpuCoreManager&&) = delete; |
| 24 | 24 | ||
| @@ -27,7 +27,8 @@ public: | |||
| 27 | CpuCoreManager& operator=(const CpuCoreManager&) = delete; | 27 | CpuCoreManager& operator=(const CpuCoreManager&) = delete; |
| 28 | CpuCoreManager& operator=(CpuCoreManager&&) = delete; | 28 | CpuCoreManager& operator=(CpuCoreManager&&) = delete; |
| 29 | 29 | ||
| 30 | void Initialize(System& system); | 30 | void Initialize(); |
| 31 | void StartThreads(); | ||
| 31 | void Shutdown(); | 32 | void Shutdown(); |
| 32 | 33 | ||
| 33 | Cpu& GetCore(std::size_t index); | 34 | Cpu& GetCore(std::size_t index); |
| @@ -54,6 +55,8 @@ private: | |||
| 54 | 55 | ||
| 55 | /// Map of guest threads to CPU cores | 56 | /// Map of guest threads to CPU cores |
| 56 | std::map<std::thread::id, Cpu*> thread_to_cpu; | 57 | std::map<std::thread::id, Cpu*> thread_to_cpu; |
| 58 | |||
| 59 | System& system; | ||
| 57 | }; | 60 | }; |
| 58 | 61 | ||
| 59 | } // namespace Core | 62 | } // namespace Core |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 4d58e7c69..8539fabe4 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -182,7 +182,12 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) { | |||
| 182 | 182 | ||
| 183 | void KernelCore::MakeCurrentProcess(Process* process) { | 183 | void KernelCore::MakeCurrentProcess(Process* process) { |
| 184 | impl->current_process = process; | 184 | impl->current_process = process; |
| 185 | Memory::SetCurrentPageTable(&process->VMManager().page_table); | 185 | |
| 186 | if (process == nullptr) { | ||
| 187 | return; | ||
| 188 | } | ||
| 189 | |||
| 190 | Memory::SetCurrentPageTable(*process); | ||
| 186 | } | 191 | } |
| 187 | 192 | ||
| 188 | Process* KernelCore::CurrentProcess() { | 193 | Process* KernelCore::CurrentProcess() { |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 8b2b3877d..6d7a7e754 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -28,12 +28,12 @@ namespace { | |||
| 28 | * | 28 | * |
| 29 | * @param owner_process The parent process for the main thread | 29 | * @param owner_process The parent process for the main thread |
| 30 | * @param kernel The kernel instance to create the main thread under. | 30 | * @param kernel The kernel instance to create the main thread under. |
| 31 | * @param entry_point The address at which the thread should start execution | ||
| 32 | * @param priority The priority to give the main thread | 31 | * @param priority The priority to give the main thread |
| 33 | */ | 32 | */ |
| 34 | void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { | 33 | void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) { |
| 35 | // Initialize new "main" thread | 34 | const auto& vm_manager = owner_process.VMManager(); |
| 36 | const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); | 35 | const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress(); |
| 36 | const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress(); | ||
| 37 | auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, | 37 | auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0, |
| 38 | owner_process.GetIdealCore(), stack_top, owner_process); | 38 | owner_process.GetIdealCore(), stack_top, owner_process); |
| 39 | 39 | ||
| @@ -105,8 +105,6 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 105 | is_64bit_process = metadata.Is64BitProgram(); | 105 | is_64bit_process = metadata.Is64BitProgram(); |
| 106 | 106 | ||
| 107 | vm_manager.Reset(metadata.GetAddressSpaceType()); | 107 | vm_manager.Reset(metadata.GetAddressSpaceType()); |
| 108 | // Ensure that the potentially resized page table is seen by CPU backends. | ||
| 109 | Memory::SetCurrentPageTable(&vm_manager.page_table); | ||
| 110 | 108 | ||
| 111 | const auto& caps = metadata.GetKernelCapabilities(); | 109 | const auto& caps = metadata.GetKernelCapabilities(); |
| 112 | const auto capability_init_result = | 110 | const auto capability_init_result = |
| @@ -118,7 +116,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 118 | return handle_table.SetSize(capabilities.GetHandleTableSize()); | 116 | return handle_table.SetSize(capabilities.GetHandleTableSize()); |
| 119 | } | 117 | } |
| 120 | 118 | ||
| 121 | void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { | 119 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 122 | // The kernel always ensures that the given stack size is page aligned. | 120 | // The kernel always ensures that the given stack size is page aligned. |
| 123 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); | 121 | main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); |
| 124 | 122 | ||
| @@ -134,7 +132,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) { | |||
| 134 | vm_manager.LogLayout(); | 132 | vm_manager.LogLayout(); |
| 135 | ChangeStatus(ProcessStatus::Running); | 133 | ChangeStatus(ProcessStatus::Running); |
| 136 | 134 | ||
| 137 | SetupMainThread(*this, kernel, entry_point, main_thread_priority); | 135 | SetupMainThread(*this, kernel, main_thread_priority); |
| 138 | } | 136 | } |
| 139 | 137 | ||
| 140 | void Process::PrepareForTermination() { | 138 | void Process::PrepareForTermination() { |
| @@ -241,9 +239,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { | |||
| 241 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); | 239 | MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData); |
| 242 | 240 | ||
| 243 | code_memory_size += module_.memory.size(); | 241 | code_memory_size += module_.memory.size(); |
| 244 | |||
| 245 | // Clear instruction cache in CPU JIT | ||
| 246 | system.InvalidateCpuInstructionCaches(); | ||
| 247 | } | 242 | } |
| 248 | 243 | ||
| 249 | Process::Process(Core::System& system) | 244 | Process::Process(Core::System& system) |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index dda52f4c0..bf3b7eef3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -225,9 +225,12 @@ public: | |||
| 225 | ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata); | 225 | ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata); |
| 226 | 226 | ||
| 227 | /** | 227 | /** |
| 228 | * Applies address space changes and launches the process main thread. | 228 | * Starts the main application thread for this process. |
| 229 | * | ||
| 230 | * @param main_thread_priority The priority for the main thread. | ||
| 231 | * @param stack_size The stack size for the main thread in bytes. | ||
| 229 | */ | 232 | */ |
| 230 | void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size); | 233 | void Run(s32 main_thread_priority, u64 stack_size); |
| 231 | 234 | ||
| 232 | /** | 235 | /** |
| 233 | * Prepares a process for termination by stopping all of its threads | 236 | * Prepares a process for termination by stopping all of its threads |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 4eeb97bef..4c763b288 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -2290,7 +2290,7 @@ static const FunctionDef SVC_Table[] = { | |||
| 2290 | {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, | 2290 | {0x33, SvcWrap<GetThreadContext>, "GetThreadContext"}, |
| 2291 | {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, | 2291 | {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"}, |
| 2292 | {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, | 2292 | {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"}, |
| 2293 | {0x36, nullptr, "Unknown"}, | 2293 | {0x36, nullptr, "SynchronizePreemptionState"}, |
| 2294 | {0x37, nullptr, "Unknown"}, | 2294 | {0x37, nullptr, "Unknown"}, |
| 2295 | {0x38, nullptr, "Unknown"}, | 2295 | {0x38, nullptr, "Unknown"}, |
| 2296 | {0x39, nullptr, "Unknown"}, | 2296 | {0x39, nullptr, "Unknown"}, |
diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 90580ed93..c8eaf9488 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp | |||
| @@ -30,7 +30,7 @@ void WaitObject::RemoveWaitingThread(Thread* thread) { | |||
| 30 | waiting_threads.erase(itr); | 30 | waiting_threads.erase(itr); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() { | 33 | SharedPtr<Thread> WaitObject::GetHighestPriorityReadyThread() const { |
| 34 | Thread* candidate = nullptr; | 34 | Thread* candidate = nullptr; |
| 35 | u32 candidate_priority = THREADPRIO_LOWEST + 1; | 35 | u32 candidate_priority = THREADPRIO_LOWEST + 1; |
| 36 | 36 | ||
diff --git a/src/core/hle/kernel/wait_object.h b/src/core/hle/kernel/wait_object.h index 04464a51a..3271a30a7 100644 --- a/src/core/hle/kernel/wait_object.h +++ b/src/core/hle/kernel/wait_object.h | |||
| @@ -54,7 +54,7 @@ public: | |||
| 54 | void WakeupWaitingThread(SharedPtr<Thread> thread); | 54 | void WakeupWaitingThread(SharedPtr<Thread> thread); |
| 55 | 55 | ||
| 56 | /// Obtains the highest priority thread that is ready to run from this object's waiting list. | 56 | /// Obtains the highest priority thread that is ready to run from this object's waiting list. |
| 57 | SharedPtr<Thread> GetHighestPriorityReadyThread(); | 57 | SharedPtr<Thread> GetHighestPriorityReadyThread() const; |
| 58 | 58 | ||
| 59 | /// Get a const reference to the waiting threads list for debug use | 59 | /// Get a const reference to the waiting threads list for debug use |
| 60 | const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const; | 60 | const std::vector<SharedPtr<Thread>>& GetWaitingThreads() const; |
diff --git a/src/core/hle/service/audio/audctl.cpp b/src/core/hle/service/audio/audctl.cpp index b6b71f966..f43e512e9 100644 --- a/src/core/hle/service/audio/audctl.cpp +++ b/src/core/hle/service/audio/audctl.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/ipc_helpers.h" | ||
| 5 | #include "core/hle/service/audio/audctl.h" | 7 | #include "core/hle/service/audio/audctl.h" |
| 6 | 8 | ||
| 7 | namespace Service::Audio { | 9 | namespace Service::Audio { |
| @@ -11,8 +13,8 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} { | |||
| 11 | static const FunctionInfo functions[] = { | 13 | static const FunctionInfo functions[] = { |
| 12 | {0, nullptr, "GetTargetVolume"}, | 14 | {0, nullptr, "GetTargetVolume"}, |
| 13 | {1, nullptr, "SetTargetVolume"}, | 15 | {1, nullptr, "SetTargetVolume"}, |
| 14 | {2, nullptr, "GetTargetVolumeMin"}, | 16 | {2, &AudCtl::GetTargetVolumeMin, "GetTargetVolumeMin"}, |
| 15 | {3, nullptr, "GetTargetVolumeMax"}, | 17 | {3, &AudCtl::GetTargetVolumeMax, "GetTargetVolumeMax"}, |
| 16 | {4, nullptr, "IsTargetMute"}, | 18 | {4, nullptr, "IsTargetMute"}, |
| 17 | {5, nullptr, "SetTargetMute"}, | 19 | {5, nullptr, "SetTargetMute"}, |
| 18 | {6, nullptr, "IsTargetConnected"}, | 20 | {6, nullptr, "IsTargetConnected"}, |
| @@ -44,4 +46,28 @@ AudCtl::AudCtl() : ServiceFramework{"audctl"} { | |||
| 44 | 46 | ||
| 45 | AudCtl::~AudCtl() = default; | 47 | AudCtl::~AudCtl() = default; |
| 46 | 48 | ||
| 49 | void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) { | ||
| 50 | LOG_DEBUG(Audio, "called."); | ||
| 51 | |||
| 52 | // This service function is currently hardcoded on the | ||
| 53 | // actual console to this value (as of 6.0.0). | ||
| 54 | constexpr s32 target_min_volume = 0; | ||
| 55 | |||
| 56 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 57 | rb.Push(RESULT_SUCCESS); | ||
| 58 | rb.Push(target_min_volume); | ||
| 59 | } | ||
| 60 | |||
| 61 | void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) { | ||
| 62 | LOG_DEBUG(Audio, "called."); | ||
| 63 | |||
| 64 | // This service function is currently hardcoded on the | ||
| 65 | // actual console to this value (as of 6.0.0). | ||
| 66 | constexpr s32 target_max_volume = 15; | ||
| 67 | |||
| 68 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 69 | rb.Push(RESULT_SUCCESS); | ||
| 70 | rb.Push(target_max_volume); | ||
| 71 | } | ||
| 72 | |||
| 47 | } // namespace Service::Audio | 73 | } // namespace Service::Audio |
diff --git a/src/core/hle/service/audio/audctl.h b/src/core/hle/service/audio/audctl.h index 9d2d9e83b..c7fafc02e 100644 --- a/src/core/hle/service/audio/audctl.h +++ b/src/core/hle/service/audio/audctl.h | |||
| @@ -12,6 +12,10 @@ class AudCtl final : public ServiceFramework<AudCtl> { | |||
| 12 | public: | 12 | public: |
| 13 | explicit AudCtl(); | 13 | explicit AudCtl(); |
| 14 | ~AudCtl() override; | 14 | ~AudCtl() override; |
| 15 | |||
| 16 | private: | ||
| 17 | void GetTargetVolumeMin(Kernel::HLERequestContext& ctx); | ||
| 18 | void GetTargetVolumeMax(Kernel::HLERequestContext& ctx); | ||
| 15 | }; | 19 | }; |
| 16 | 20 | ||
| 17 | } // namespace Service::Audio | 21 | } // namespace Service::Audio |
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 07aa7a1cd..10b13fb1d 100644 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp | |||
| @@ -86,25 +86,29 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua | |||
| 86 | return FileType::Error; | 86 | return FileType::Error; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) { | 89 | AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirectory::Load( |
| 90 | Kernel::Process& process) { | ||
| 90 | if (is_loaded) { | 91 | if (is_loaded) { |
| 91 | return ResultStatus::ErrorAlreadyLoaded; | 92 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 92 | } | 93 | } |
| 93 | 94 | ||
| 94 | if (dir == nullptr) { | 95 | if (dir == nullptr) { |
| 95 | if (file == nullptr) | 96 | if (file == nullptr) { |
| 96 | return ResultStatus::ErrorNullFile; | 97 | return {ResultStatus::ErrorNullFile, {}}; |
| 98 | } | ||
| 99 | |||
| 97 | dir = file->GetContainingDirectory(); | 100 | dir = file->GetContainingDirectory(); |
| 98 | } | 101 | } |
| 99 | 102 | ||
| 100 | // Read meta to determine title ID | 103 | // Read meta to determine title ID |
| 101 | FileSys::VirtualFile npdm = dir->GetFile("main.npdm"); | 104 | FileSys::VirtualFile npdm = dir->GetFile("main.npdm"); |
| 102 | if (npdm == nullptr) | 105 | if (npdm == nullptr) { |
| 103 | return ResultStatus::ErrorMissingNPDM; | 106 | return {ResultStatus::ErrorMissingNPDM, {}}; |
| 107 | } | ||
| 104 | 108 | ||
| 105 | ResultStatus result = metadata.Load(npdm); | 109 | const ResultStatus result = metadata.Load(npdm); |
| 106 | if (result != ResultStatus::Success) { | 110 | if (result != ResultStatus::Success) { |
| 107 | return result; | 111 | return {result, {}}; |
| 108 | } | 112 | } |
| 109 | 113 | ||
| 110 | if (override_update) { | 114 | if (override_update) { |
| @@ -114,23 +118,24 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) | |||
| 114 | 118 | ||
| 115 | // Reread in case PatchExeFS affected the main.npdm | 119 | // Reread in case PatchExeFS affected the main.npdm |
| 116 | npdm = dir->GetFile("main.npdm"); | 120 | npdm = dir->GetFile("main.npdm"); |
| 117 | if (npdm == nullptr) | 121 | if (npdm == nullptr) { |
| 118 | return ResultStatus::ErrorMissingNPDM; | 122 | return {ResultStatus::ErrorMissingNPDM, {}}; |
| 123 | } | ||
| 119 | 124 | ||
| 120 | ResultStatus result2 = metadata.Load(npdm); | 125 | const ResultStatus result2 = metadata.Load(npdm); |
| 121 | if (result2 != ResultStatus::Success) { | 126 | if (result2 != ResultStatus::Success) { |
| 122 | return result2; | 127 | return {result2, {}}; |
| 123 | } | 128 | } |
| 124 | metadata.Print(); | 129 | metadata.Print(); |
| 125 | 130 | ||
| 126 | const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; | 131 | const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()}; |
| 127 | if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || | 132 | if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit || |
| 128 | arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { | 133 | arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) { |
| 129 | return ResultStatus::Error32BitISA; | 134 | return {ResultStatus::Error32BitISA, {}}; |
| 130 | } | 135 | } |
| 131 | 136 | ||
| 132 | if (process.LoadFromMetadata(metadata).IsError()) { | 137 | if (process.LoadFromMetadata(metadata).IsError()) { |
| 133 | return ResultStatus::ErrorUnableToParseKernelMetadata; | 138 | return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; |
| 134 | } | 139 | } |
| 135 | 140 | ||
| 136 | const FileSys::PatchManager pm(metadata.GetTitleID()); | 141 | const FileSys::PatchManager pm(metadata.GetTitleID()); |
| @@ -150,7 +155,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) | |||
| 150 | const auto tentative_next_load_addr = | 155 | const auto tentative_next_load_addr = |
| 151 | AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm); | 156 | AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm); |
| 152 | if (!tentative_next_load_addr) { | 157 | if (!tentative_next_load_addr) { |
| 153 | return ResultStatus::ErrorLoadingNSO; | 158 | return {ResultStatus::ErrorLoadingNSO, {}}; |
| 154 | } | 159 | } |
| 155 | 160 | ||
| 156 | next_load_addr = *tentative_next_load_addr; | 161 | next_load_addr = *tentative_next_load_addr; |
| @@ -159,8 +164,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) | |||
| 159 | GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); | 164 | GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false); |
| 160 | } | 165 | } |
| 161 | 166 | ||
| 162 | process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()); | ||
| 163 | |||
| 164 | // Find the RomFS by searching for a ".romfs" file in this directory | 167 | // Find the RomFS by searching for a ".romfs" file in this directory |
| 165 | const auto& files = dir->GetFiles(); | 168 | const auto& files = dir->GetFiles(); |
| 166 | const auto romfs_iter = | 169 | const auto romfs_iter = |
| @@ -175,7 +178,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) | |||
| 175 | } | 178 | } |
| 176 | 179 | ||
| 177 | is_loaded = true; | 180 | is_loaded = true; |
| 178 | return ResultStatus::Success; | 181 | return {ResultStatus::Success, |
| 182 | LoadParameters{metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()}}; | ||
| 179 | } | 183 | } |
| 180 | 184 | ||
| 181 | ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) { | 185 | ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) { |
diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h index 1615cb5a8..1a65c16a4 100644 --- a/src/core/loader/deconstructed_rom_directory.h +++ b/src/core/loader/deconstructed_rom_directory.h | |||
| @@ -37,7 +37,7 @@ public: | |||
| 37 | return IdentifyType(file); | 37 | return IdentifyType(file); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | ResultStatus Load(Kernel::Process& process) override; | 40 | LoadResult Load(Kernel::Process& process) override; |
| 41 | 41 | ||
| 42 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; | 42 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; |
| 43 | ResultStatus ReadIcon(std::vector<u8>& buffer) override; | 43 | ResultStatus ReadIcon(std::vector<u8>& buffer) override; |
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 46ac372f6..6d4b02375 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp | |||
| @@ -382,13 +382,15 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) { | |||
| 382 | return FileType::Error; | 382 | return FileType::Error; |
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { | 385 | AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) { |
| 386 | if (is_loaded) | 386 | if (is_loaded) { |
| 387 | return ResultStatus::ErrorAlreadyLoaded; | 387 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 388 | } | ||
| 388 | 389 | ||
| 389 | std::vector<u8> buffer = file->ReadAllBytes(); | 390 | std::vector<u8> buffer = file->ReadAllBytes(); |
| 390 | if (buffer.size() != file->GetSize()) | 391 | if (buffer.size() != file->GetSize()) { |
| 391 | return ResultStatus::ErrorIncorrectELFFileSize; | 392 | return {ResultStatus::ErrorIncorrectELFFileSize, {}}; |
| 393 | } | ||
| 392 | 394 | ||
| 393 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 395 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 394 | ElfReader elf_reader(&buffer[0]); | 396 | ElfReader elf_reader(&buffer[0]); |
| @@ -396,10 +398,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) { | |||
| 396 | const VAddr entry_point = codeset.entrypoint; | 398 | const VAddr entry_point = codeset.entrypoint; |
| 397 | 399 | ||
| 398 | process.LoadModule(std::move(codeset), entry_point); | 400 | process.LoadModule(std::move(codeset), entry_point); |
| 399 | process.Run(entry_point, 48, Memory::DEFAULT_STACK_SIZE); | ||
| 400 | 401 | ||
| 401 | is_loaded = true; | 402 | is_loaded = true; |
| 402 | return ResultStatus::Success; | 403 | return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}}; |
| 403 | } | 404 | } |
| 404 | 405 | ||
| 405 | } // namespace Loader | 406 | } // namespace Loader |
diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index a2d33021c..7ef7770a6 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h | |||
| @@ -26,7 +26,7 @@ public: | |||
| 26 | return IdentifyType(file); | 26 | return IdentifyType(file); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | ResultStatus Load(Kernel::Process& process) override; | 29 | LoadResult Load(Kernel::Process& process) override; |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | } // namespace Loader | 32 | } // namespace Loader |
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index bb925f4a6..f7846db52 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h | |||
| @@ -131,6 +131,12 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status); | |||
| 131 | /// Interface for loading an application | 131 | /// Interface for loading an application |
| 132 | class AppLoader : NonCopyable { | 132 | class AppLoader : NonCopyable { |
| 133 | public: | 133 | public: |
| 134 | struct LoadParameters { | ||
| 135 | s32 main_thread_priority; | ||
| 136 | u64 main_thread_stack_size; | ||
| 137 | }; | ||
| 138 | using LoadResult = std::pair<ResultStatus, std::optional<LoadParameters>>; | ||
| 139 | |||
| 134 | explicit AppLoader(FileSys::VirtualFile file); | 140 | explicit AppLoader(FileSys::VirtualFile file); |
| 135 | virtual ~AppLoader(); | 141 | virtual ~AppLoader(); |
| 136 | 142 | ||
| @@ -145,7 +151,7 @@ public: | |||
| 145 | * @param process The newly created process. | 151 | * @param process The newly created process. |
| 146 | * @return The status result of the operation. | 152 | * @return The status result of the operation. |
| 147 | */ | 153 | */ |
| 148 | virtual ResultStatus Load(Kernel::Process& process) = 0; | 154 | virtual LoadResult Load(Kernel::Process& process) = 0; |
| 149 | 155 | ||
| 150 | /** | 156 | /** |
| 151 | * Loads the system mode that this application needs. | 157 | * Loads the system mode that this application needs. |
diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp index 93a970d10..34efef09a 100644 --- a/src/core/loader/nax.cpp +++ b/src/core/loader/nax.cpp | |||
| @@ -41,31 +41,37 @@ FileType AppLoader_NAX::GetFileType() const { | |||
| 41 | return IdentifyTypeImpl(*nax); | 41 | return IdentifyTypeImpl(*nax); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | ResultStatus AppLoader_NAX::Load(Kernel::Process& process) { | 44 | AppLoader_NAX::LoadResult AppLoader_NAX::Load(Kernel::Process& process) { |
| 45 | if (is_loaded) { | 45 | if (is_loaded) { |
| 46 | return ResultStatus::ErrorAlreadyLoaded; | 46 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | if (nax->GetStatus() != ResultStatus::Success) | 49 | const auto nax_status = nax->GetStatus(); |
| 50 | return nax->GetStatus(); | 50 | if (nax_status != ResultStatus::Success) { |
| 51 | return {nax_status, {}}; | ||
| 52 | } | ||
| 51 | 53 | ||
| 52 | const auto nca = nax->AsNCA(); | 54 | const auto nca = nax->AsNCA(); |
| 53 | if (nca == nullptr) { | 55 | if (nca == nullptr) { |
| 54 | if (!Core::Crypto::KeyManager::KeyFileExists(false)) | 56 | if (!Core::Crypto::KeyManager::KeyFileExists(false)) { |
| 55 | return ResultStatus::ErrorMissingProductionKeyFile; | 57 | return {ResultStatus::ErrorMissingProductionKeyFile, {}}; |
| 56 | return ResultStatus::ErrorNAXInconvertibleToNCA; | 58 | } |
| 59 | |||
| 60 | return {ResultStatus::ErrorNAXInconvertibleToNCA, {}}; | ||
| 57 | } | 61 | } |
| 58 | 62 | ||
| 59 | if (nca->GetStatus() != ResultStatus::Success) | 63 | const auto nca_status = nca->GetStatus(); |
| 60 | return nca->GetStatus(); | 64 | if (nca_status != ResultStatus::Success) { |
| 65 | return {nca_status, {}}; | ||
| 66 | } | ||
| 61 | 67 | ||
| 62 | const auto result = nca_loader->Load(process); | 68 | const auto result = nca_loader->Load(process); |
| 63 | if (result != ResultStatus::Success) | 69 | if (result.first != ResultStatus::Success) { |
| 64 | return result; | 70 | return result; |
| 71 | } | ||
| 65 | 72 | ||
| 66 | is_loaded = true; | 73 | is_loaded = true; |
| 67 | 74 | return result; | |
| 68 | return ResultStatus::Success; | ||
| 69 | } | 75 | } |
| 70 | 76 | ||
| 71 | ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) { | 77 | ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) { |
diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h index f40079574..00f1659c1 100644 --- a/src/core/loader/nax.h +++ b/src/core/loader/nax.h | |||
| @@ -33,7 +33,7 @@ public: | |||
| 33 | 33 | ||
| 34 | FileType GetFileType() const override; | 34 | FileType GetFileType() const override; |
| 35 | 35 | ||
| 36 | ResultStatus Load(Kernel::Process& process) override; | 36 | LoadResult Load(Kernel::Process& process) override; |
| 37 | 37 | ||
| 38 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; | 38 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; |
| 39 | u64 ReadRomFSIVFCOffset() const override; | 39 | u64 ReadRomFSIVFCOffset() const override; |
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp index ce8196fcf..b3f8f1083 100644 --- a/src/core/loader/nca.cpp +++ b/src/core/loader/nca.cpp | |||
| @@ -30,36 +30,38 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) { | |||
| 30 | return FileType::Error; | 30 | return FileType::Error; |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | ResultStatus AppLoader_NCA::Load(Kernel::Process& process) { | 33 | AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::Process& process) { |
| 34 | if (is_loaded) { | 34 | if (is_loaded) { |
| 35 | return ResultStatus::ErrorAlreadyLoaded; | 35 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | const auto result = nca->GetStatus(); | 38 | const auto result = nca->GetStatus(); |
| 39 | if (result != ResultStatus::Success) { | 39 | if (result != ResultStatus::Success) { |
| 40 | return result; | 40 | return {result, {}}; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | if (nca->GetType() != FileSys::NCAContentType::Program) | 43 | if (nca->GetType() != FileSys::NCAContentType::Program) { |
| 44 | return ResultStatus::ErrorNCANotProgram; | 44 | return {ResultStatus::ErrorNCANotProgram, {}}; |
| 45 | } | ||
| 45 | 46 | ||
| 46 | const auto exefs = nca->GetExeFS(); | 47 | const auto exefs = nca->GetExeFS(); |
| 47 | 48 | if (exefs == nullptr) { | |
| 48 | if (exefs == nullptr) | 49 | return {ResultStatus::ErrorNoExeFS, {}}; |
| 49 | return ResultStatus::ErrorNoExeFS; | 50 | } |
| 50 | 51 | ||
| 51 | directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true); | 52 | directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true); |
| 52 | 53 | ||
| 53 | const auto load_result = directory_loader->Load(process); | 54 | const auto load_result = directory_loader->Load(process); |
| 54 | if (load_result != ResultStatus::Success) | 55 | if (load_result.first != ResultStatus::Success) { |
| 55 | return load_result; | 56 | return load_result; |
| 57 | } | ||
| 56 | 58 | ||
| 57 | if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) | 59 | if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) { |
| 58 | Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); | 60 | Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); |
| 61 | } | ||
| 59 | 62 | ||
| 60 | is_loaded = true; | 63 | is_loaded = true; |
| 61 | 64 | return load_result; | |
| 62 | return ResultStatus::Success; | ||
| 63 | } | 65 | } |
| 64 | 66 | ||
| 65 | ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) { | 67 | ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) { |
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h index b9f077468..94f0ed677 100644 --- a/src/core/loader/nca.h +++ b/src/core/loader/nca.h | |||
| @@ -33,7 +33,7 @@ public: | |||
| 33 | return IdentifyType(file); | 33 | return IdentifyType(file); |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | ResultStatus Load(Kernel::Process& process) override; | 36 | LoadResult Load(Kernel::Process& process) override; |
| 37 | 37 | ||
| 38 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; | 38 | ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override; |
| 39 | u64 ReadRomFSIVFCOffset() const override; | 39 | u64 ReadRomFSIVFCOffset() const override; |
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 31e4a0c84..6a0ca389b 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp | |||
| @@ -201,25 +201,25 @@ bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& fi | |||
| 201 | return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base); | 201 | return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base); |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | ResultStatus AppLoader_NRO::Load(Kernel::Process& process) { | 204 | AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) { |
| 205 | if (is_loaded) { | 205 | if (is_loaded) { |
| 206 | return ResultStatus::ErrorAlreadyLoaded; | 206 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | // Load NRO | 209 | // Load NRO |
| 210 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 210 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 211 | 211 | ||
| 212 | if (!LoadNro(process, *file, base_address)) { | 212 | if (!LoadNro(process, *file, base_address)) { |
| 213 | return ResultStatus::ErrorLoadingNRO; | 213 | return {ResultStatus::ErrorLoadingNRO, {}}; |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | if (romfs != nullptr) | 216 | if (romfs != nullptr) { |
| 217 | Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); | 217 | Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this)); |
| 218 | 218 | } | |
| 219 | process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); | ||
| 220 | 219 | ||
| 221 | is_loaded = true; | 220 | is_loaded = true; |
| 222 | return ResultStatus::Success; | 221 | return {ResultStatus::Success, |
| 222 | LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; | ||
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) { | 225 | ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) { |
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h index 85b0ed644..1ffdae805 100644 --- a/src/core/loader/nro.h +++ b/src/core/loader/nro.h | |||
| @@ -37,7 +37,7 @@ public: | |||
| 37 | return IdentifyType(file); | 37 | return IdentifyType(file); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | ResultStatus Load(Kernel::Process& process) override; | 40 | LoadResult Load(Kernel::Process& process) override; |
| 41 | 41 | ||
| 42 | ResultStatus ReadIcon(std::vector<u8>& buffer) override; | 42 | ResultStatus ReadIcon(std::vector<u8>& buffer) override; |
| 43 | ResultStatus ReadProgramId(u64& out_program_id) override; | 43 | ResultStatus ReadProgramId(u64& out_program_id) override; |
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index d7c47c197..a86653204 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp | |||
| @@ -169,22 +169,21 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, | |||
| 169 | return load_base + image_size; | 169 | return load_base + image_size; |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | ResultStatus AppLoader_NSO::Load(Kernel::Process& process) { | 172 | AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) { |
| 173 | if (is_loaded) { | 173 | if (is_loaded) { |
| 174 | return ResultStatus::ErrorAlreadyLoaded; | 174 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | // Load module | 177 | // Load module |
| 178 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); | 178 | const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); |
| 179 | if (!LoadModule(process, *file, base_address, true)) { | 179 | if (!LoadModule(process, *file, base_address, true)) { |
| 180 | return ResultStatus::ErrorLoadingNSO; | 180 | return {ResultStatus::ErrorLoadingNSO, {}}; |
| 181 | } | 181 | } |
| 182 | LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address); | 182 | LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address); |
| 183 | 183 | ||
| 184 | process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE); | ||
| 185 | |||
| 186 | is_loaded = true; | 184 | is_loaded = true; |
| 187 | return ResultStatus::Success; | 185 | return {ResultStatus::Success, |
| 186 | LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}}; | ||
| 188 | } | 187 | } |
| 189 | 188 | ||
| 190 | } // namespace Loader | 189 | } // namespace Loader |
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index 4674c3724..fdce9191c 100644 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h | |||
| @@ -84,7 +84,7 @@ public: | |||
| 84 | VAddr load_base, bool should_pass_arguments, | 84 | VAddr load_base, bool should_pass_arguments, |
| 85 | std::optional<FileSys::PatchManager> pm = {}); | 85 | std::optional<FileSys::PatchManager> pm = {}); |
| 86 | 86 | ||
| 87 | ResultStatus Load(Kernel::Process& process) override; | 87 | LoadResult Load(Kernel::Process& process) override; |
| 88 | }; | 88 | }; |
| 89 | 89 | ||
| 90 | } // namespace Loader | 90 | } // namespace Loader |
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp index 7da1f8960..ad56bbb38 100644 --- a/src/core/loader/nsp.cpp +++ b/src/core/loader/nsp.cpp | |||
| @@ -72,37 +72,45 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) { | |||
| 72 | return FileType::Error; | 72 | return FileType::Error; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | ResultStatus AppLoader_NSP::Load(Kernel::Process& process) { | 75 | AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) { |
| 76 | if (is_loaded) { | 76 | if (is_loaded) { |
| 77 | return ResultStatus::ErrorAlreadyLoaded; | 77 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | if (title_id == 0) | 80 | if (title_id == 0) { |
| 81 | return ResultStatus::ErrorNSPMissingProgramNCA; | 81 | return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; |
| 82 | } | ||
| 82 | 83 | ||
| 83 | if (nsp->GetStatus() != ResultStatus::Success) | 84 | const auto nsp_status = nsp->GetStatus(); |
| 84 | return nsp->GetStatus(); | 85 | if (nsp_status != ResultStatus::Success) { |
| 86 | return {nsp_status, {}}; | ||
| 87 | } | ||
| 85 | 88 | ||
| 86 | if (nsp->GetProgramStatus(title_id) != ResultStatus::Success) | 89 | const auto nsp_program_status = nsp->GetProgramStatus(title_id); |
| 87 | return nsp->GetProgramStatus(title_id); | 90 | if (nsp_program_status != ResultStatus::Success) { |
| 91 | return {nsp_program_status, {}}; | ||
| 92 | } | ||
| 88 | 93 | ||
| 89 | if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { | 94 | if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) { |
| 90 | if (!Core::Crypto::KeyManager::KeyFileExists(false)) | 95 | if (!Core::Crypto::KeyManager::KeyFileExists(false)) { |
| 91 | return ResultStatus::ErrorMissingProductionKeyFile; | 96 | return {ResultStatus::ErrorMissingProductionKeyFile, {}}; |
| 92 | return ResultStatus::ErrorNSPMissingProgramNCA; | 97 | } |
| 98 | |||
| 99 | return {ResultStatus::ErrorNSPMissingProgramNCA, {}}; | ||
| 93 | } | 100 | } |
| 94 | 101 | ||
| 95 | const auto result = secondary_loader->Load(process); | 102 | const auto result = secondary_loader->Load(process); |
| 96 | if (result != ResultStatus::Success) | 103 | if (result.first != ResultStatus::Success) { |
| 97 | return result; | 104 | return result; |
| 105 | } | ||
| 98 | 106 | ||
| 99 | FileSys::VirtualFile update_raw; | 107 | FileSys::VirtualFile update_raw; |
| 100 | if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) | 108 | if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { |
| 101 | Service::FileSystem::SetPackedUpdate(std::move(update_raw)); | 109 | Service::FileSystem::SetPackedUpdate(std::move(update_raw)); |
| 110 | } | ||
| 102 | 111 | ||
| 103 | is_loaded = true; | 112 | is_loaded = true; |
| 104 | 113 | return result; | |
| 105 | return ResultStatus::Success; | ||
| 106 | } | 114 | } |
| 107 | 115 | ||
| 108 | ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) { | 116 | ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) { |
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h index 953a1b508..85e870bdf 100644 --- a/src/core/loader/nsp.h +++ b/src/core/loader/nsp.h | |||
| @@ -35,7 +35,7 @@ public: | |||
| 35 | return IdentifyType(file); | 35 | return IdentifyType(file); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | ResultStatus Load(Kernel::Process& process) override; | 38 | LoadResult Load(Kernel::Process& process) override; |
| 39 | 39 | ||
| 40 | ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; | 40 | ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; |
| 41 | u64 ReadRomFSIVFCOffset() const override; | 41 | u64 ReadRomFSIVFCOffset() const override; |
diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp index 89f7bbf77..1e285a053 100644 --- a/src/core/loader/xci.cpp +++ b/src/core/loader/xci.cpp | |||
| @@ -48,31 +48,35 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) { | |||
| 48 | return FileType::Error; | 48 | return FileType::Error; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | ResultStatus AppLoader_XCI::Load(Kernel::Process& process) { | 51 | AppLoader_XCI::LoadResult AppLoader_XCI::Load(Kernel::Process& process) { |
| 52 | if (is_loaded) { | 52 | if (is_loaded) { |
| 53 | return ResultStatus::ErrorAlreadyLoaded; | 53 | return {ResultStatus::ErrorAlreadyLoaded, {}}; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | if (xci->GetStatus() != ResultStatus::Success) | 56 | if (xci->GetStatus() != ResultStatus::Success) { |
| 57 | return xci->GetStatus(); | 57 | return {xci->GetStatus(), {}}; |
| 58 | } | ||
| 58 | 59 | ||
| 59 | if (xci->GetProgramNCAStatus() != ResultStatus::Success) | 60 | if (xci->GetProgramNCAStatus() != ResultStatus::Success) { |
| 60 | return xci->GetProgramNCAStatus(); | 61 | return {xci->GetProgramNCAStatus(), {}}; |
| 62 | } | ||
| 61 | 63 | ||
| 62 | if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) | 64 | if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) { |
| 63 | return ResultStatus::ErrorMissingProductionKeyFile; | 65 | return {ResultStatus::ErrorMissingProductionKeyFile, {}}; |
| 66 | } | ||
| 64 | 67 | ||
| 65 | const auto result = nca_loader->Load(process); | 68 | const auto result = nca_loader->Load(process); |
| 66 | if (result != ResultStatus::Success) | 69 | if (result.first != ResultStatus::Success) { |
| 67 | return result; | 70 | return result; |
| 71 | } | ||
| 68 | 72 | ||
| 69 | FileSys::VirtualFile update_raw; | 73 | FileSys::VirtualFile update_raw; |
| 70 | if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) | 74 | if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) { |
| 71 | Service::FileSystem::SetPackedUpdate(std::move(update_raw)); | 75 | Service::FileSystem::SetPackedUpdate(std::move(update_raw)); |
| 76 | } | ||
| 72 | 77 | ||
| 73 | is_loaded = true; | 78 | is_loaded = true; |
| 74 | 79 | return result; | |
| 75 | return ResultStatus::Success; | ||
| 76 | } | 80 | } |
| 77 | 81 | ||
| 78 | ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) { | 82 | ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) { |
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h index 436f7387c..ae7145b14 100644 --- a/src/core/loader/xci.h +++ b/src/core/loader/xci.h | |||
| @@ -35,7 +35,7 @@ public: | |||
| 35 | return IdentifyType(file); | 35 | return IdentifyType(file); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | ResultStatus Load(Kernel::Process& process) override; | 38 | LoadResult Load(Kernel::Process& process) override; |
| 39 | 39 | ||
| 40 | ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; | 40 | ResultStatus ReadRomFS(FileSys::VirtualFile& file) override; |
| 41 | u64 ReadRomFSIVFCOffset() const override; | 41 | u64 ReadRomFSIVFCOffset() const override; |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 4e0538bc2..f18f6226b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -26,16 +26,16 @@ namespace Memory { | |||
| 26 | 26 | ||
| 27 | static Common::PageTable* current_page_table = nullptr; | 27 | static Common::PageTable* current_page_table = nullptr; |
| 28 | 28 | ||
| 29 | void SetCurrentPageTable(Common::PageTable* page_table) { | 29 | void SetCurrentPageTable(Kernel::Process& process) { |
| 30 | current_page_table = page_table; | 30 | current_page_table = &process.VMManager().page_table; |
| 31 | |||
| 32 | const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth(); | ||
| 31 | 33 | ||
| 32 | auto& system = Core::System::GetInstance(); | 34 | auto& system = Core::System::GetInstance(); |
| 33 | if (system.IsPoweredOn()) { | 35 | system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width); |
| 34 | system.ArmInterface(0).PageTableChanged(); | 36 | system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width); |
| 35 | system.ArmInterface(1).PageTableChanged(); | 37 | system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width); |
| 36 | system.ArmInterface(2).PageTableChanged(); | 38 | system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); |
| 37 | system.ArmInterface(3).PageTableChanged(); | ||
| 38 | } | ||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, | 41 | static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory, |
diff --git a/src/core/memory.h b/src/core/memory.h index 6845f5fe1..b9fa18b1d 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -40,8 +40,9 @@ enum : VAddr { | |||
| 40 | KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, | 40 | KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE, |
| 41 | }; | 41 | }; |
| 42 | 42 | ||
| 43 | /// Changes the currently active page table. | 43 | /// Changes the currently active page table to that of |
| 44 | void SetCurrentPageTable(Common::PageTable* page_table); | 44 | /// the given process instance. |
| 45 | void SetCurrentPageTable(Kernel::Process& process); | ||
| 45 | 46 | ||
| 46 | /// Determines if the given VAddr is valid for the specified process. | 47 | /// Determines if the given VAddr is valid for the specified process. |
| 47 | bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); | 48 | bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1e31a2900..6821f275d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -36,6 +36,8 @@ add_library(video_core STATIC | |||
| 36 | renderer_base.h | 36 | renderer_base.h |
| 37 | renderer_opengl/gl_buffer_cache.cpp | 37 | renderer_opengl/gl_buffer_cache.cpp |
| 38 | renderer_opengl/gl_buffer_cache.h | 38 | renderer_opengl/gl_buffer_cache.h |
| 39 | renderer_opengl/gl_device.cpp | ||
| 40 | renderer_opengl/gl_device.h | ||
| 39 | renderer_opengl/gl_global_cache.cpp | 41 | renderer_opengl/gl_global_cache.cpp |
| 40 | renderer_opengl/gl_global_cache.h | 42 | renderer_opengl/gl_global_cache.h |
| 41 | renderer_opengl/gl_primitive_assembler.cpp | 43 | renderer_opengl/gl_primitive_assembler.cpp |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 046d047cb..6674d9405 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -57,8 +57,8 @@ bool DmaPusher::Step() { | |||
| 57 | 57 | ||
| 58 | // Push buffer non-empty, read a word | 58 | // Push buffer non-empty, read a word |
| 59 | command_headers.resize(command_list_header.size); | 59 | command_headers.resize(command_list_header.size); |
| 60 | gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(), | 60 | gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(), |
| 61 | command_list_header.size * sizeof(u32)); | 61 | command_list_header.size * sizeof(u32)); |
| 62 | 62 | ||
| 63 | for (const CommandHeader& command_header : command_headers) { | 63 | for (const CommandHeader& command_header : command_headers) { |
| 64 | 64 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d7..7387886a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/decoders.h" | ||
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 15 | 16 | ||
| @@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 27 | 28 | ||
| 28 | switch (method_call.method) { | 29 | switch (method_call.method) { |
| 29 | case KEPLERMEMORY_REG_INDEX(exec): { | 30 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 30 | state.write_offset = 0; | 31 | ProcessExec(); |
| 31 | break; | 32 | break; |
| 32 | } | 33 | } |
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 34 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | ProcessData(method_call.argument); | 35 | ProcessData(method_call.argument, method_call.IsLastCall()); |
| 35 | break; | 36 | break; |
| 36 | } | 37 | } |
| 37 | } | 38 | } |
| 38 | } | 39 | } |
| 39 | 40 | ||
| 40 | void KeplerMemory::ProcessData(u32 data) { | 41 | void KeplerMemory::ProcessExec() { |
| 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | 42 | state.write_offset = 0; |
| 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | 43 | state.copy_size = regs.line_length_in * regs.line_count; |
| 43 | 44 | state.inner_buffer.resize(state.copy_size); | |
| 44 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 45 | } |
| 45 | // We do this before actually writing the new data because the destination address might | ||
| 46 | // contain a dirty surface that will have to be written back to memory. | ||
| 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||
| 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||
| 49 | memory_manager.Write<u32>(address, data); | ||
| 50 | |||
| 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 52 | 46 | ||
| 53 | state.write_offset++; | 47 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { |
| 48 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||
| 49 | std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||
| 50 | state.write_offset += sub_copy_size; | ||
| 51 | if (is_last_call) { | ||
| 52 | const GPUVAddr address{regs.dest.Address()}; | ||
| 53 | if (regs.exec.linear != 0) { | ||
| 54 | memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||
| 55 | } else { | ||
| 56 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 57 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 58 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 59 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 60 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 61 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 62 | std::vector<u8> tmp_buffer(dst_size); | ||
| 63 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 64 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 65 | regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||
| 66 | state.inner_buffer.data(), tmp_buffer.data()); | ||
| 67 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 68 | } | ||
| 69 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 70 | } | ||
| 54 | } | 71 | } |
| 55 | 72 | ||
| 56 | } // namespace Tegra::Engines | 73 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e45..5f892ddad 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -51,7 +52,11 @@ public: | |||
| 51 | u32 address_high; | 52 | u32 address_high; |
| 52 | u32 address_low; | 53 | u32 address_low; |
| 53 | u32 pitch; | 54 | u32 pitch; |
| 54 | u32 block_dimensions; | 55 | union { |
| 56 | BitField<0, 4, u32> block_width; | ||
| 57 | BitField<4, 4, u32> block_height; | ||
| 58 | BitField<8, 4, u32> block_depth; | ||
| 59 | }; | ||
| 55 | u32 width; | 60 | u32 width; |
| 56 | u32 height; | 61 | u32 height; |
| 57 | u32 depth; | 62 | u32 depth; |
| @@ -63,6 +68,18 @@ public: | |||
| 63 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | 68 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 64 | address_low); | 69 | address_low); |
| 65 | } | 70 | } |
| 71 | |||
| 72 | u32 BlockWidth() const { | ||
| 73 | return 1U << block_width.Value(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 BlockHeight() const { | ||
| 77 | return 1U << block_height.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 BlockDepth() const { | ||
| 81 | return 1U << block_depth.Value(); | ||
| 82 | } | ||
| 66 | } dest; | 83 | } dest; |
| 67 | 84 | ||
| 68 | struct { | 85 | struct { |
| @@ -81,6 +98,8 @@ public: | |||
| 81 | 98 | ||
| 82 | struct { | 99 | struct { |
| 83 | u32 write_offset = 0; | 100 | u32 write_offset = 0; |
| 101 | u32 copy_size = 0; | ||
| 102 | std::vector<u8> inner_buffer; | ||
| 84 | } state{}; | 103 | } state{}; |
| 85 | 104 | ||
| 86 | private: | 105 | private: |
| @@ -88,7 +107,8 @@ private: | |||
| 88 | VideoCore::RasterizerInterface& rasterizer; | 107 | VideoCore::RasterizerInterface& rasterizer; |
| 89 | MemoryManager& memory_manager; | 108 | MemoryManager& memory_manager; |
| 90 | 109 | ||
| 91 | void ProcessData(u32 data); | 110 | void ProcessExec(); |
| 111 | void ProcessData(u32 data, bool is_last_call); | ||
| 92 | }; | 112 | }; |
| 93 | 113 | ||
| 94 | #define ASSERT_REG_POSITION(field_name, position) \ | 114 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b198793bc..9780417f2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -418,7 +418,7 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 418 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; | 418 | const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; |
| 419 | 419 | ||
| 420 | Texture::TICEntry tic_entry; | 420 | Texture::TICEntry tic_entry; |
| 421 | memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 421 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 422 | 422 | ||
| 423 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 423 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || |
| 424 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 424 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, |
| @@ -439,7 +439,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | |||
| 439 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; | 439 | const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; |
| 440 | 440 | ||
| 441 | Texture::TSCEntry tsc_entry; | 441 | Texture::TSCEntry tsc_entry; |
| 442 | memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 442 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 443 | return tsc_entry; | 443 | return tsc_entry; |
| 444 | } | 444 | } |
| 445 | 445 | ||
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index fce9733b9..e5b4eadea 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -937,21 +937,34 @@ union Instruction { | |||
| 937 | } iset; | 937 | } iset; |
| 938 | 938 | ||
| 939 | union { | 939 | union { |
| 940 | BitField<8, 2, Register::Size> dest_size; | 940 | BitField<41, 2, u64> selector; // i2i and i2f only |
| 941 | BitField<10, 2, Register::Size> src_size; | ||
| 942 | BitField<12, 1, u64> is_output_signed; | ||
| 943 | BitField<13, 1, u64> is_input_signed; | ||
| 944 | BitField<41, 2, u64> selector; | ||
| 945 | BitField<45, 1, u64> negate_a; | 941 | BitField<45, 1, u64> negate_a; |
| 946 | BitField<49, 1, u64> abs_a; | 942 | BitField<49, 1, u64> abs_a; |
| 943 | BitField<10, 2, Register::Size> src_size; | ||
| 944 | BitField<13, 1, u64> is_input_signed; | ||
| 945 | BitField<8, 2, Register::Size> dst_size; | ||
| 946 | BitField<12, 1, u64> is_output_signed; | ||
| 947 | |||
| 948 | union { | ||
| 949 | BitField<39, 2, u64> tab5cb8_2; | ||
| 950 | } i2f; | ||
| 947 | 951 | ||
| 948 | union { | 952 | union { |
| 949 | BitField<39, 2, F2iRoundingOp> rounding; | 953 | BitField<39, 2, F2iRoundingOp> rounding; |
| 950 | } f2i; | 954 | } f2i; |
| 951 | 955 | ||
| 952 | union { | 956 | union { |
| 953 | BitField<39, 4, F2fRoundingOp> rounding; | 957 | BitField<8, 2, Register::Size> src_size; |
| 958 | BitField<10, 2, Register::Size> dst_size; | ||
| 959 | BitField<39, 4, u64> rounding; | ||
| 960 | // H0, H1 extract for F16 missing | ||
| 961 | BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value | ||
| 962 | F2fRoundingOp GetRoundingMode() const { | ||
| 963 | constexpr u64 rounding_mask = 0x0B; | ||
| 964 | return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask); | ||
| 965 | } | ||
| 954 | } f2f; | 966 | } f2f; |
| 967 | |||
| 955 | } conversion; | 968 | } conversion; |
| 956 | 969 | ||
| 957 | union { | 970 | union { |
| @@ -1734,7 +1747,7 @@ private: | |||
| 1734 | INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), | 1747 | INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"), |
| 1735 | INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), | 1748 | INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), |
| 1736 | INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), | 1749 | INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), |
| 1737 | INST("01110001-1000---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), | 1750 | INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), |
| 1738 | INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), | 1751 | INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), |
| 1739 | INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), | 1752 | INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), |
| 1740 | INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), | 1753 | INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index de30ea354..fe6628923 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -207,6 +207,11 @@ public: | |||
| 207 | }; | 207 | }; |
| 208 | } regs{}; | 208 | } regs{}; |
| 209 | 209 | ||
| 210 | /// Performs any additional setup necessary in order to begin GPU emulation. | ||
| 211 | /// This can be used to launch any necessary threads and register any necessary | ||
| 212 | /// core timing events. | ||
| 213 | virtual void Start() = 0; | ||
| 214 | |||
| 210 | /// Push GPU command entries to be processed | 215 | /// Push GPU command entries to be processed |
| 211 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 216 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; |
| 212 | 217 | ||
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index db507cf04..d4e2553a9 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -9,10 +9,14 @@ | |||
| 9 | namespace VideoCommon { | 9 | namespace VideoCommon { |
| 10 | 10 | ||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 12 | : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {} | 12 | : GPU(system, renderer), gpu_thread{system} {} |
| 13 | 13 | ||
| 14 | GPUAsynch::~GPUAsynch() = default; | 14 | GPUAsynch::~GPUAsynch() = default; |
| 15 | 15 | ||
| 16 | void GPUAsynch::Start() { | ||
| 17 | gpu_thread.StartThread(renderer, *dma_pusher); | ||
| 18 | } | ||
| 19 | |||
| 16 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | 20 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { |
| 17 | gpu_thread.SubmitList(std::move(entries)); | 21 | gpu_thread.SubmitList(std::move(entries)); |
| 18 | } | 22 | } |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1dcc61a6c..30be74cba 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -13,16 +13,13 @@ class RendererBase; | |||
| 13 | 13 | ||
| 14 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 15 | 15 | ||
| 16 | namespace GPUThread { | ||
| 17 | class ThreadManager; | ||
| 18 | } // namespace GPUThread | ||
| 19 | |||
| 20 | /// Implementation of GPU interface that runs the GPU asynchronously | 16 | /// Implementation of GPU interface that runs the GPU asynchronously |
| 21 | class GPUAsynch : public Tegra::GPU { | 17 | class GPUAsynch : public Tegra::GPU { |
| 22 | public: | 18 | public: |
| 23 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | 19 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 24 | ~GPUAsynch() override; | 20 | ~GPUAsynch() override; |
| 25 | 21 | ||
| 22 | void Start() override; | ||
| 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 23 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 27 | void SwapBuffers( | 24 | void SwapBuffers( |
| 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | 25 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 2cfc900ed..45e43b1dc 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -8,10 +8,12 @@ | |||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | 9 | ||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) |
| 11 | : Tegra::GPU(system, renderer) {} | 11 | : GPU(system, renderer) {} |
| 12 | 12 | ||
| 13 | GPUSynch::~GPUSynch() = default; | 13 | GPUSynch::~GPUSynch() = default; |
| 14 | 14 | ||
| 15 | void GPUSynch::Start() {} | ||
| 16 | |||
| 15 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | 17 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { |
| 16 | dma_pusher->Push(std::move(entries)); | 18 | dma_pusher->Push(std::move(entries)); |
| 17 | dma_pusher->DispatchCalls(); | 19 | dma_pusher->DispatchCalls(); |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 766b5631c..3031fcf72 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -18,6 +18,7 @@ public: | |||
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); |
| 19 | ~GPUSynch() override; | 19 | ~GPUSynch() override; |
| 20 | 20 | ||
| 21 | void Start() override; | ||
| 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 22 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 22 | void SwapBuffers( | 23 | void SwapBuffers( |
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | 24 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index cc56cf467..c9a2077de 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -55,19 +55,24 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 55 | } | 55 | } |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, | 58 | ThreadManager::ThreadManager(Core::System& system) : system{system} {} |
| 59 | Tegra::DmaPusher& dma_pusher) | ||
| 60 | : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} { | ||
| 61 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 62 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 63 | } | ||
| 64 | 59 | ||
| 65 | ThreadManager::~ThreadManager() { | 60 | ThreadManager::~ThreadManager() { |
| 61 | if (!thread.joinable()) { | ||
| 62 | return; | ||
| 63 | } | ||
| 64 | |||
| 66 | // Notify GPU thread that a shutdown is pending | 65 | // Notify GPU thread that a shutdown is pending |
| 67 | PushCommand(EndProcessingCommand()); | 66 | PushCommand(EndProcessingCommand()); |
| 68 | thread.join(); | 67 | thread.join(); |
| 69 | } | 68 | } |
| 70 | 69 | ||
| 70 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | ||
| 71 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | ||
| 72 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 73 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 74 | } | ||
| 75 | |||
| 71 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 76 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 72 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | 77 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; |
| 73 | const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; | 78 | const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 62bcea5bb..cc14527c7 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -138,10 +138,12 @@ struct SynchState final { | |||
| 138 | /// Class used to manage the GPU thread | 138 | /// Class used to manage the GPU thread |
| 139 | class ThreadManager final { | 139 | class ThreadManager final { |
| 140 | public: | 140 | public: |
| 141 | explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, | 141 | explicit ThreadManager(Core::System& system); |
| 142 | Tegra::DmaPusher& dma_pusher); | ||
| 143 | ~ThreadManager(); | 142 | ~ThreadManager(); |
| 144 | 143 | ||
| 144 | /// Creates and starts the GPU thread. | ||
| 145 | void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | ||
| 146 | |||
| 145 | /// Push GPU command entries to be processed | 147 | /// Push GPU command entries to be processed |
| 146 | void SubmitList(Tegra::CommandList&& entries); | 148 | void SubmitList(Tegra::CommandList&& entries); |
| 147 | 149 | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0f4e820aa..6c98c6701 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -199,7 +199,15 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 199 | return {}; | 199 | return {}; |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { | 202 | bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) { |
| 203 | const GPUVAddr end = start + size; | ||
| 204 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); | ||
| 205 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); | ||
| 206 | const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); | ||
| 207 | return range == size; | ||
| 208 | } | ||
| 209 | |||
| 210 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { | ||
| 203 | std::size_t remaining_size{size}; | 211 | std::size_t remaining_size{size}; |
| 204 | std::size_t page_index{src_addr >> page_bits}; | 212 | std::size_t page_index{src_addr >> page_bits}; |
| 205 | std::size_t page_offset{src_addr & page_mask}; | 213 | std::size_t page_offset{src_addr & page_mask}; |
| @@ -226,7 +234,30 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t | |||
| 226 | } | 234 | } |
| 227 | } | 235 | } |
| 228 | 236 | ||
| 229 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { | 237 | void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, |
| 238 | const std::size_t size) const { | ||
| 239 | std::size_t remaining_size{size}; | ||
| 240 | std::size_t page_index{src_addr >> page_bits}; | ||
| 241 | std::size_t page_offset{src_addr & page_mask}; | ||
| 242 | |||
| 243 | while (remaining_size > 0) { | ||
| 244 | const std::size_t copy_amount{ | ||
| 245 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 246 | const u8* page_pointer = page_table.pointers[page_index]; | ||
| 247 | if (page_pointer) { | ||
| 248 | const u8* src_ptr{page_pointer + page_offset}; | ||
| 249 | std::memcpy(dest_buffer, src_ptr, copy_amount); | ||
| 250 | } else { | ||
| 251 | std::memset(dest_buffer, 0, copy_amount); | ||
| 252 | } | ||
| 253 | page_index++; | ||
| 254 | page_offset = 0; | ||
| 255 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 256 | remaining_size -= copy_amount; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { | ||
| 230 | std::size_t remaining_size{size}; | 261 | std::size_t remaining_size{size}; |
| 231 | std::size_t page_index{dest_addr >> page_bits}; | 262 | std::size_t page_index{dest_addr >> page_bits}; |
| 232 | std::size_t page_offset{dest_addr & page_mask}; | 263 | std::size_t page_offset{dest_addr & page_mask}; |
| @@ -253,7 +284,28 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std:: | |||
| 253 | } | 284 | } |
| 254 | } | 285 | } |
| 255 | 286 | ||
| 256 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) { | 287 | void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, |
| 288 | const std::size_t size) { | ||
| 289 | std::size_t remaining_size{size}; | ||
| 290 | std::size_t page_index{dest_addr >> page_bits}; | ||
| 291 | std::size_t page_offset{dest_addr & page_mask}; | ||
| 292 | |||
| 293 | while (remaining_size > 0) { | ||
| 294 | const std::size_t copy_amount{ | ||
| 295 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 296 | u8* page_pointer = page_table.pointers[page_index]; | ||
| 297 | if (page_pointer) { | ||
| 298 | u8* dest_ptr{page_pointer + page_offset}; | ||
| 299 | std::memcpy(dest_ptr, src_buffer, copy_amount); | ||
| 300 | } | ||
| 301 | page_index++; | ||
| 302 | page_offset = 0; | ||
| 303 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||
| 304 | remaining_size -= copy_amount; | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | ||
| 257 | std::size_t remaining_size{size}; | 309 | std::size_t remaining_size{size}; |
| 258 | std::size_t page_index{src_addr >> page_bits}; | 310 | std::size_t page_index{src_addr >> page_bits}; |
| 259 | std::size_t page_offset{src_addr & page_mask}; | 311 | std::size_t page_offset{src_addr & page_mask}; |
| @@ -281,6 +333,12 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t | |||
| 281 | } | 333 | } |
| 282 | } | 334 | } |
| 283 | 335 | ||
| 336 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | ||
| 337 | std::vector<u8> tmp_buffer(size); | ||
| 338 | ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); | ||
| 339 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | ||
| 340 | } | ||
| 341 | |||
| 284 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | 342 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 285 | VAddr backing_addr) { | 343 | VAddr backing_addr) { |
| 286 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | 344 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 647cbf93a..e4f0c4bd6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -65,9 +65,32 @@ public: | |||
| 65 | u8* GetPointer(GPUVAddr addr); | 65 | u8* GetPointer(GPUVAddr addr); |
| 66 | const u8* GetPointer(GPUVAddr addr) const; | 66 | const u8* GetPointer(GPUVAddr addr) const; |
| 67 | 67 | ||
| 68 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; | 68 | // Returns true if the block is continous in host memory, false otherwise |
| 69 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 69 | bool IsBlockContinous(const GPUVAddr start, const std::size_t size); |
| 70 | void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 70 | |
| 71 | /** | ||
| 72 | * ReadBlock and WriteBlock are full read and write operations over virtual | ||
| 73 | * GPU Memory. It's important to use these when GPU memory may not be continous | ||
| 74 | * in the Host Memory counterpart. Note: This functions cause Host GPU Memory | ||
| 75 | * Flushes and Invalidations, respectively to each operation. | ||
| 76 | */ | ||
| 77 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; | ||
| 78 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); | ||
| 79 | void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); | ||
| 80 | |||
| 81 | /** | ||
| 82 | * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and | ||
| 83 | * WriteBlock respectively. In this versions, no flushing or invalidation is actually | ||
| 84 | * done and their performance is similar to a memcpy. This functions can be used | ||
| 85 | * on either of this 2 scenarios instead of their safe counterpart: | ||
| 86 | * - Memory which is sure to never be represented in the Host GPU. | ||
| 87 | * - Memory Managed by a Cache Manager. Example: Texture Flushing should use | ||
| 88 | * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture | ||
| 89 | * being flushed. | ||
| 90 | */ | ||
| 91 | void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; | ||
| 92 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); | ||
| 93 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); | ||
| 71 | 94 | ||
| 72 | private: | 95 | private: |
| 73 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | 96 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp new file mode 100644 index 000000000..b6d9e0ddb --- /dev/null +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstddef> | ||
| 6 | #include <glad/glad.h> | ||
| 7 | |||
| 8 | #include "common/logging/log.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 10 | |||
| 11 | namespace OpenGL { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | template <typename T> | ||
| 15 | T GetInteger(GLenum pname) { | ||
| 16 | GLint temporary; | ||
| 17 | glGetIntegerv(pname, &temporary); | ||
| 18 | return static_cast<T>(temporary); | ||
| 19 | } | ||
| 20 | } // Anonymous namespace | ||
| 21 | |||
| 22 | Device::Device() { | ||
| 23 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | ||
| 24 | has_variable_aoffi = TestVariableAoffi(); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool Device::TestVariableAoffi() { | ||
| 28 | const GLchar* AOFFI_TEST = R"(#version 430 core | ||
| 29 | uniform sampler2D tex; | ||
| 30 | uniform ivec2 variable_offset; | ||
| 31 | void main() { | ||
| 32 | gl_Position = textureOffset(tex, vec2(0), variable_offset); | ||
| 33 | } | ||
| 34 | )"; | ||
| 35 | const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)}; | ||
| 36 | GLint link_status{}; | ||
| 37 | glGetProgramiv(shader, GL_LINK_STATUS, &link_status); | ||
| 38 | glDeleteProgram(shader); | ||
| 39 | |||
| 40 | const bool supported{link_status == GL_TRUE}; | ||
| 41 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported); | ||
| 42 | return supported; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h new file mode 100644 index 000000000..78ff5ee58 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | namespace OpenGL { | ||
| 10 | |||
| 11 | class Device { | ||
| 12 | public: | ||
| 13 | Device(); | ||
| 14 | |||
| 15 | std::size_t GetUniformBufferAlignment() const { | ||
| 16 | return uniform_buffer_alignment; | ||
| 17 | } | ||
| 18 | |||
| 19 | bool HasVariableAoffi() const { | ||
| 20 | return has_variable_aoffi; | ||
| 21 | } | ||
| 22 | |||
| 23 | private: | ||
| 24 | static bool TestVariableAoffi(); | ||
| 25 | |||
| 26 | std::size_t uniform_buffer_alignment{}; | ||
| 27 | bool has_variable_aoffi{}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6034dc489..9a088a503 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -99,7 +99,7 @@ struct FramebufferCacheKey { | |||
| 99 | }; | 99 | }; |
| 100 | 100 | ||
| 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) | 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) |
| 102 | : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, | 102 | : res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, |
| 103 | screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { | 103 | screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { |
| 104 | OpenGLState::ApplyDefaultState(); | 104 | OpenGLState::ApplyDefaultState(); |
| 105 | 105 | ||
| @@ -107,8 +107,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) | |||
| 107 | state.draw.shader_program = 0; | 107 | state.draw.shader_program = 0; |
| 108 | state.Apply(); | 108 | state.Apply(); |
| 109 | 109 | ||
| 110 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | ||
| 111 | |||
| 112 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); | 110 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 113 | CheckExtensions(); | 111 | CheckExtensions(); |
| 114 | } | 112 | } |
| @@ -315,8 +313,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 315 | 313 | ||
| 316 | GLShader::MaxwellUniformData ubo{}; | 314 | GLShader::MaxwellUniformData ubo{}; |
| 317 | ubo.SetFromRegs(gpu, stage); | 315 | ubo.SetFromRegs(gpu, stage); |
| 318 | const GLintptr offset = buffer_cache.UploadHostMemory( | 316 | const GLintptr offset = |
| 319 | &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); | 317 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 320 | 318 | ||
| 321 | // Bind the emulation info buffer | 319 | // Bind the emulation info buffer |
| 322 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 320 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, |
| @@ -700,23 +698,24 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 700 | // Add space for index buffer (keeping in mind non-core primitives) | 698 | // Add space for index buffer (keeping in mind non-core primitives) |
| 701 | switch (regs.draw.topology) { | 699 | switch (regs.draw.topology) { |
| 702 | case Maxwell::PrimitiveTopology::Quads: | 700 | case Maxwell::PrimitiveTopology::Quads: |
| 703 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + | 701 | buffer_size = Common::AlignUp(buffer_size, 4) + |
| 704 | primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); | 702 | primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); |
| 705 | break; | 703 | break; |
| 706 | default: | 704 | default: |
| 707 | if (is_indexed) { | 705 | if (is_indexed) { |
| 708 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); | 706 | buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); |
| 709 | } | 707 | } |
| 710 | break; | 708 | break; |
| 711 | } | 709 | } |
| 712 | 710 | ||
| 713 | // Uniform space for the 5 shader stages | 711 | // Uniform space for the 5 shader stages |
| 714 | buffer_size = | 712 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + |
| 715 | Common::AlignUp<std::size_t>(buffer_size, 4) + | 713 | (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * |
| 716 | (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; | 714 | Maxwell::MaxShaderStage; |
| 717 | 715 | ||
| 718 | // Add space for at least 18 constant buffers | 716 | // Add space for at least 18 constant buffers |
| 719 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 717 | buffer_size += |
| 718 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 720 | 719 | ||
| 721 | const bool invalidate = buffer_cache.Map(buffer_size); | 720 | const bool invalidate = buffer_cache.Map(buffer_size); |
| 722 | if (invalidate) { | 721 | if (invalidate) { |
| @@ -848,8 +847,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader | |||
| 848 | size = Common::AlignUp(size, sizeof(GLvec4)); | 847 | size = Common::AlignUp(size, sizeof(GLvec4)); |
| 849 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | 848 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); |
| 850 | 849 | ||
| 851 | const GLintptr const_buffer_offset = buffer_cache.UploadMemory( | 850 | const GLintptr const_buffer_offset = |
| 852 | buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); | 851 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); |
| 853 | 852 | ||
| 854 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); | 853 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); |
| 855 | } | 854 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a0e056142..71b9c5ead 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/rasterizer_cache.h" | 21 | #include "video_core/rasterizer_cache.h" |
| 22 | #include "video_core/rasterizer_interface.h" | 22 | #include "video_core/rasterizer_interface.h" |
| 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 24 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 24 | #include "video_core/renderer_opengl/gl_global_cache.h" | 25 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_primitive_assembler.h" | 26 | #include "video_core/renderer_opengl/gl_primitive_assembler.h" |
| 26 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 27 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| @@ -172,6 +173,7 @@ private: | |||
| 172 | /// but are needed for correct emulation | 173 | /// but are needed for correct emulation |
| 173 | void CheckExtensions(); | 174 | void CheckExtensions(); |
| 174 | 175 | ||
| 176 | const Device device; | ||
| 175 | OpenGLState state; | 177 | OpenGLState state; |
| 176 | 178 | ||
| 177 | RasterizerCacheOpenGL res_cache; | 179 | RasterizerCacheOpenGL res_cache; |
| @@ -180,7 +182,6 @@ private: | |||
| 180 | SamplerCacheOpenGL sampler_cache; | 182 | SamplerCacheOpenGL sampler_cache; |
| 181 | 183 | ||
| 182 | Core::System& system; | 184 | Core::System& system; |
| 183 | |||
| 184 | ScreenInfo& screen_info; | 185 | ScreenInfo& screen_info; |
| 185 | 186 | ||
| 186 | std::unique_ptr<GLShader::ProgramManager> shader_program_manager; | 187 | std::unique_ptr<GLShader::ProgramManager> shader_program_manager; |
| @@ -196,7 +197,6 @@ private: | |||
| 196 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 197 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 197 | OGLBufferCache buffer_cache; | 198 | OGLBufferCache buffer_cache; |
| 198 | PrimitiveAssembler primitive_assembler{buffer_cache}; | 199 | PrimitiveAssembler primitive_assembler{buffer_cache}; |
| 199 | GLint uniform_buffer_alignment; | ||
| 200 | 200 | ||
| 201 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 201 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 202 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 202 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7a68b8738..5a25f5b37 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -640,13 +640,16 @@ void CachedSurface::LoadGLBuffer() { | |||
| 640 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); | 640 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); |
| 641 | } else { | 641 | } else { |
| 642 | const u32 bpp = params.GetFormatBpp() / 8; | 642 | const u32 bpp = params.GetFormatBpp() / 8; |
| 643 | const u32 copy_size = params.width * bpp; | 643 | const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / |
| 644 | GetDefaultBlockWidth(params.pixel_format); | ||
| 644 | if (params.pitch == copy_size) { | 645 | if (params.pitch == copy_size) { |
| 645 | std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); | 646 | std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); |
| 646 | } else { | 647 | } else { |
| 648 | const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / | ||
| 649 | GetDefaultBlockHeight(params.pixel_format); | ||
| 647 | const u8* start{params.host_ptr}; | 650 | const u8* start{params.host_ptr}; |
| 648 | u8* write_to = gl_buffer[0].data(); | 651 | u8* write_to = gl_buffer[0].data(); |
| 649 | for (u32 h = params.height; h > 0; h--) { | 652 | for (u32 h = height; h > 0; h--) { |
| 650 | std::memcpy(write_to, start, copy_size); | 653 | std::memcpy(write_to, start, copy_size); |
| 651 | start += params.pitch; | 654 | start += params.pitch; |
| 652 | write_to += copy_size; | 655 | write_to += copy_size; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 99f67494c..2a81b1169 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -38,13 +38,15 @@ GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | /// Gets the shader program code from memory for the specified address | 40 | /// Gets the shader program code from memory for the specified address |
| 41 | ProgramCode GetShaderCode(const u8* host_ptr) { | 41 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, |
| 42 | const u8* host_ptr) { | ||
| 42 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | 43 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 43 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { | 44 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { |
| 44 | std::fill(program_code.begin(), program_code.end(), 0); | 45 | std::fill(program_code.begin(), program_code.end(), 0); |
| 45 | return program_code; | 46 | return program_code; |
| 46 | }); | 47 | }); |
| 47 | std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); | 48 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), |
| 49 | program_code.size() * sizeof(u64)); | ||
| 48 | return program_code; | 50 | return program_code; |
| 49 | } | 51 | } |
| 50 | 52 | ||
| @@ -134,8 +136,8 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 134 | } | 136 | } |
| 135 | 137 | ||
| 136 | /// Creates an unspecialized program from code streams | 138 | /// Creates an unspecialized program from code streams |
| 137 | GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code, | 139 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, |
| 138 | ProgramCode program_code_b) { | 140 | ProgramCode program_code, ProgramCode program_code_b) { |
| 139 | GLShader::ShaderSetup setup(program_code); | 141 | GLShader::ShaderSetup setup(program_code); |
| 140 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 142 | if (program_type == Maxwell::ShaderProgram::VertexA) { |
| 141 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 143 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| @@ -149,11 +151,11 @@ GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, Progr | |||
| 149 | switch (program_type) { | 151 | switch (program_type) { |
| 150 | case Maxwell::ShaderProgram::VertexA: | 152 | case Maxwell::ShaderProgram::VertexA: |
| 151 | case Maxwell::ShaderProgram::VertexB: | 153 | case Maxwell::ShaderProgram::VertexB: |
| 152 | return GLShader::GenerateVertexShader(setup); | 154 | return GLShader::GenerateVertexShader(device, setup); |
| 153 | case Maxwell::ShaderProgram::Geometry: | 155 | case Maxwell::ShaderProgram::Geometry: |
| 154 | return GLShader::GenerateGeometryShader(setup); | 156 | return GLShader::GenerateGeometryShader(device, setup); |
| 155 | case Maxwell::ShaderProgram::Fragment: | 157 | case Maxwell::ShaderProgram::Fragment: |
| 156 | return GLShader::GenerateFragmentShader(setup); | 158 | return GLShader::GenerateFragmentShader(device, setup); |
| 157 | default: | 159 | default: |
| 158 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 160 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 159 | UNREACHABLE(); | 161 | UNREACHABLE(); |
| @@ -212,22 +214,20 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 212 | return supported_formats; | 214 | return supported_formats; |
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | } // namespace | 217 | } // Anonymous namespace |
| 216 | 218 | ||
| 217 | CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, | 219 | CachedShader::CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, |
| 218 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 220 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 219 | const PrecompiledPrograms& precompiled_programs, | 221 | const PrecompiledPrograms& precompiled_programs, |
| 220 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) | 222 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) |
| 221 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, | 223 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, |
| 222 | unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, | 224 | unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, |
| 223 | precompiled_programs{precompiled_programs} { | 225 | precompiled_programs{precompiled_programs} { |
| 224 | 226 | const std::size_t code_size{CalculateProgramSize(program_code)}; | |
| 225 | const std::size_t code_size = CalculateProgramSize(program_code); | 227 | const std::size_t code_size_b{program_code_b.empty() ? 0 |
| 226 | const std::size_t code_size_b = | 228 | : CalculateProgramSize(program_code_b)}; |
| 227 | program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b); | 229 | GLShader::ProgramResult program_result{ |
| 228 | 230 | CreateProgram(device, program_type, program_code, program_code_b)}; | |
| 229 | GLShader::ProgramResult program_result = | ||
| 230 | CreateProgram(program_type, program_code, program_code_b); | ||
| 231 | if (program_result.first.empty()) { | 231 | if (program_result.first.empty()) { |
| 232 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 232 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 233 | return; | 233 | return; |
| @@ -251,7 +251,6 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, | |||
| 251 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, | 251 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, |
| 252 | program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ | 252 | program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ |
| 253 | precompiled_programs} { | 253 | precompiled_programs} { |
| 254 | |||
| 255 | code = std::move(result.first); | 254 | code = std::move(result.first); |
| 256 | entries = result.second; | 255 | entries = result.second; |
| 257 | shader_length = entries.shader_length; | 256 | shader_length = entries.shader_length; |
| @@ -344,8 +343,9 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, | |||
| 344 | return {unique_identifier, base_bindings, primitive_mode}; | 343 | return {unique_identifier, base_bindings, primitive_mode}; |
| 345 | } | 344 | } |
| 346 | 345 | ||
| 347 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system) | 346 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 348 | : RasterizerCache{rasterizer}, disk_cache{system} {} | 347 | const Device& device) |
| 348 | : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {} | ||
| 349 | 349 | ||
| 350 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 350 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 351 | const VideoCore::DiskResourceLoadCallback& callback) { | 351 | const VideoCore::DiskResourceLoadCallback& callback) { |
| @@ -439,17 +439,18 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 439 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { | 439 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { |
| 440 | std::unordered_map<u64, UnspecializedShader> unspecialized; | 440 | std::unordered_map<u64, UnspecializedShader> unspecialized; |
| 441 | 441 | ||
| 442 | if (callback) | 442 | if (callback) { |
| 443 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | 443 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); |
| 444 | } | ||
| 444 | 445 | ||
| 445 | for (std::size_t i = 0; i < raws.size(); ++i) { | 446 | for (std::size_t i = 0; i < raws.size(); ++i) { |
| 446 | if (stop_loading) | 447 | if (stop_loading) { |
| 447 | return {}; | 448 | return {}; |
| 448 | 449 | } | |
| 449 | const auto& raw{raws[i]}; | 450 | const auto& raw{raws[i]}; |
| 450 | const u64 unique_identifier = raw.GetUniqueIdentifier(); | 451 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; |
| 451 | const u64 calculated_hash = | 452 | const u64 calculated_hash{ |
| 452 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); | 453 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; |
| 453 | if (unique_identifier != calculated_hash) { | 454 | if (unique_identifier != calculated_hash) { |
| 454 | LOG_ERROR( | 455 | LOG_ERROR( |
| 455 | Render_OpenGL, | 456 | Render_OpenGL, |
| @@ -466,8 +467,8 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 466 | result = {stored_decompiled.code, stored_decompiled.entries}; | 467 | result = {stored_decompiled.code, stored_decompiled.entries}; |
| 467 | } else { | 468 | } else { |
| 468 | // Otherwise decompile the shader at boot and save the result to the decompiled file | 469 | // Otherwise decompile the shader at boot and save the result to the decompiled file |
| 469 | result = | 470 | result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), |
| 470 | CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); | 471 | raw.GetProgramCodeB()); |
| 471 | disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); | 472 | disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); |
| 472 | } | 473 | } |
| 473 | 474 | ||
| @@ -477,8 +478,9 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia | |||
| 477 | {raw.GetUniqueIdentifier(), | 478 | {raw.GetUniqueIdentifier(), |
| 478 | {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); | 479 | {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); |
| 479 | 480 | ||
| 480 | if (callback) | 481 | if (callback) { |
| 481 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | 482 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); |
| 483 | } | ||
| 482 | } | 484 | } |
| 483 | return unspecialized; | 485 | return unspecialized; |
| 484 | } | 486 | } |
| @@ -497,11 +499,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 497 | 499 | ||
| 498 | if (!shader) { | 500 | if (!shader) { |
| 499 | // No shader found - create a new one | 501 | // No shader found - create a new one |
| 500 | ProgramCode program_code{GetShaderCode(host_ptr)}; | 502 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 501 | ProgramCode program_code_b; | 503 | ProgramCode program_code_b; |
| 502 | if (program == Maxwell::ShaderProgram::VertexA) { | 504 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 503 | program_code_b = GetShaderCode( | 505 | const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; |
| 504 | memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | 506 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 507 | memory_manager.GetPointer(program_addr_b)); | ||
| 505 | } | 508 | } |
| 506 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 509 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); |
| 507 | const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 510 | const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| @@ -512,7 +515,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 512 | precompiled_programs, found->second, host_ptr); | 515 | precompiled_programs, found->second, host_ptr); |
| 513 | } else { | 516 | } else { |
| 514 | shader = std::make_shared<CachedShader>( | 517 | shader = std::make_shared<CachedShader>( |
| 515 | cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, | 518 | device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, |
| 516 | std::move(program_code), std::move(program_code_b), host_ptr); | 519 | std::move(program_code), std::move(program_code_b), host_ptr); |
| 517 | } | 520 | } |
| 518 | Register(shader); | 521 | Register(shader); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 0cf8e0b3d..a332087f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -27,6 +27,7 @@ class System; | |||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| 28 | 28 | ||
| 29 | class CachedShader; | 29 | class CachedShader; |
| 30 | class Device; | ||
| 30 | class RasterizerOpenGL; | 31 | class RasterizerOpenGL; |
| 31 | struct UnspecializedShader; | 32 | struct UnspecializedShader; |
| 32 | 33 | ||
| @@ -38,7 +39,7 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | |||
| 38 | 39 | ||
| 39 | class CachedShader final : public RasterizerCacheObject { | 40 | class CachedShader final : public RasterizerCacheObject { |
| 40 | public: | 41 | public: |
| 41 | explicit CachedShader(VAddr cpu_addr, u64 unique_identifier, | 42 | explicit CachedShader(const Device& device, VAddr cpu_addr, u64 unique_identifier, |
| 42 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | 43 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 43 | const PrecompiledPrograms& precompiled_programs, | 44 | const PrecompiledPrograms& precompiled_programs, |
| 44 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); | 45 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); |
| @@ -112,7 +113,8 @@ private: | |||
| 112 | 113 | ||
| 113 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 114 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| 114 | public: | 115 | public: |
| 115 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system); | 116 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 117 | const Device& device); | ||
| 116 | 118 | ||
| 117 | /// Loads disk cache for the current game | 119 | /// Loads disk cache for the current game |
| 118 | void LoadDiskCache(const std::atomic_bool& stop_loading, | 120 | void LoadDiskCache(const std::atomic_bool& stop_loading, |
| @@ -130,6 +132,8 @@ private: | |||
| 130 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | 132 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, |
| 131 | const std::set<GLenum>& supported_formats); | 133 | const std::set<GLenum>& supported_formats); |
| 132 | 134 | ||
| 135 | const Device& device; | ||
| 136 | |||
| 133 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 137 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 134 | 138 | ||
| 135 | ShaderDiskCacheOpenGL disk_cache; | 139 | ShaderDiskCacheOpenGL disk_cache; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 445048daf..ef1a1995f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 20 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 20 | #include "video_core/shader/shader_ir.h" | 21 | #include "video_core/shader/shader_ir.h" |
| @@ -119,14 +120,10 @@ std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | |||
| 119 | 120 | ||
| 120 | /// Returns true if an object has to be treated as precise | 121 | /// Returns true if an object has to be treated as precise |
| 121 | bool IsPrecise(Operation operand) { | 122 | bool IsPrecise(Operation operand) { |
| 122 | const auto& meta = operand.GetMeta(); | 123 | const auto& meta{operand.GetMeta()}; |
| 123 | |||
| 124 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | 124 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| 125 | return arithmetic->precise; | 125 | return arithmetic->precise; |
| 126 | } | 126 | } |
| 127 | if (const auto half_arithmetic = std::get_if<MetaHalfArithmetic>(&meta)) { | ||
| 128 | return half_arithmetic->precise; | ||
| 129 | } | ||
| 130 | return false; | 127 | return false; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| @@ -139,8 +136,9 @@ bool IsPrecise(Node node) { | |||
| 139 | 136 | ||
| 140 | class GLSLDecompiler final { | 137 | class GLSLDecompiler final { |
| 141 | public: | 138 | public: |
| 142 | explicit GLSLDecompiler(const ShaderIR& ir, ShaderStage stage, std::string suffix) | 139 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, |
| 143 | : ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 140 | std::string suffix) |
| 141 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | ||
| 144 | 142 | ||
| 145 | void Decompile() { | 143 | void Decompile() { |
| 146 | DeclareVertex(); | 144 | DeclareVertex(); |
| @@ -627,28 +625,7 @@ private: | |||
| 627 | } | 625 | } |
| 628 | 626 | ||
| 629 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { | 627 | std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { |
| 630 | std::string value = VisitOperand(operation, operand_index); | 628 | return CastOperand(VisitOperand(operation, operand_index), type); |
| 631 | switch (type) { | ||
| 632 | case Type::HalfFloat: { | ||
| 633 | const auto half_meta = std::get_if<MetaHalfArithmetic>(&operation.GetMeta()); | ||
| 634 | if (!half_meta) { | ||
| 635 | value = "toHalf2(" + value + ')'; | ||
| 636 | } | ||
| 637 | |||
| 638 | switch (half_meta->types.at(operand_index)) { | ||
| 639 | case Tegra::Shader::HalfType::H0_H1: | ||
| 640 | return "toHalf2(" + value + ')'; | ||
| 641 | case Tegra::Shader::HalfType::F32: | ||
| 642 | return "vec2(" + value + ')'; | ||
| 643 | case Tegra::Shader::HalfType::H0_H0: | ||
| 644 | return "vec2(toHalf2(" + value + ")[0])"; | ||
| 645 | case Tegra::Shader::HalfType::H1_H1: | ||
| 646 | return "vec2(toHalf2(" + value + ")[1])"; | ||
| 647 | } | ||
| 648 | } | ||
| 649 | default: | ||
| 650 | return CastOperand(value, type); | ||
| 651 | } | ||
| 652 | } | 629 | } |
| 653 | 630 | ||
| 654 | std::string CastOperand(const std::string& value, Type type) const { | 631 | std::string CastOperand(const std::string& value, Type type) const { |
| @@ -662,9 +639,7 @@ private: | |||
| 662 | case Type::Uint: | 639 | case Type::Uint: |
| 663 | return "ftou(" + value + ')'; | 640 | return "ftou(" + value + ')'; |
| 664 | case Type::HalfFloat: | 641 | case Type::HalfFloat: |
| 665 | // Can't be handled as a stand-alone value | 642 | return "toHalf2(" + value + ')'; |
| 666 | UNREACHABLE(); | ||
| 667 | return value; | ||
| 668 | } | 643 | } |
| 669 | UNREACHABLE(); | 644 | UNREACHABLE(); |
| 670 | return value; | 645 | return value; |
| @@ -829,8 +804,12 @@ private: | |||
| 829 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | 804 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required |
| 830 | // to be constant by the standard). | 805 | // to be constant by the standard). |
| 831 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 806 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 832 | } else { | 807 | } else if (device.HasVariableAoffi()) { |
| 808 | // Avoid using variable AOFFI on unsupported devices. | ||
| 833 | expr += "ftoi(" + Visit(operand) + ')'; | 809 | expr += "ftoi(" + Visit(operand) + ')'; |
| 810 | } else { | ||
| 811 | // Insert 0 on devices not supporting variable AOFFI. | ||
| 812 | expr += '0'; | ||
| 834 | } | 813 | } |
| 835 | if (index + 1 < aoffi.size()) { | 814 | if (index + 1 < aoffi.size()) { |
| 836 | expr += ", "; | 815 | expr += ", "; |
| @@ -1083,13 +1062,40 @@ private: | |||
| 1083 | return BitwiseCastResult(value, Type::HalfFloat); | 1062 | return BitwiseCastResult(value, Type::HalfFloat); |
| 1084 | } | 1063 | } |
| 1085 | 1064 | ||
| 1065 | std::string HClamp(Operation operation) { | ||
| 1066 | const std::string value = VisitOperand(operation, 0, Type::HalfFloat); | ||
| 1067 | const std::string min = VisitOperand(operation, 1, Type::Float); | ||
| 1068 | const std::string max = VisitOperand(operation, 2, Type::Float); | ||
| 1069 | const std::string clamped = "clamp(" + value + ", vec2(" + min + "), vec2(" + max + "))"; | ||
| 1070 | return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | std::string HUnpack(Operation operation) { | ||
| 1074 | const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | ||
| 1075 | const auto value = [&]() -> std::string { | ||
| 1076 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1077 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1078 | return operand; | ||
| 1079 | case Tegra::Shader::HalfType::F32: | ||
| 1080 | return "vec2(fromHalf2(" + operand + "))"; | ||
| 1081 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1082 | return "vec2(" + operand + "[0])"; | ||
| 1083 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1084 | return "vec2(" + operand + "[1])"; | ||
| 1085 | } | ||
| 1086 | UNREACHABLE(); | ||
| 1087 | return "0"; | ||
| 1088 | }(); | ||
| 1089 | return "fromHalf2(" + value + ')'; | ||
| 1090 | } | ||
| 1091 | |||
| 1086 | std::string HMergeF32(Operation operation) { | 1092 | std::string HMergeF32(Operation operation) { |
| 1087 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; | 1093 | return "float(toHalf2(" + Visit(operation[0]) + ")[0])"; |
| 1088 | } | 1094 | } |
| 1089 | 1095 | ||
| 1090 | std::string HMergeH0(Operation operation) { | 1096 | std::string HMergeH0(Operation operation) { |
| 1091 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[0]) + ")[1], toHalf2(" + | 1097 | return "fromHalf2(vec2(toHalf2(" + Visit(operation[1]) + ")[0], toHalf2(" + |
| 1092 | Visit(operation[1]) + ")[0]))"; | 1098 | Visit(operation[0]) + ")[1]))"; |
| 1093 | } | 1099 | } |
| 1094 | 1100 | ||
| 1095 | std::string HMergeH1(Operation operation) { | 1101 | std::string HMergeH1(Operation operation) { |
| @@ -1189,34 +1195,46 @@ private: | |||
| 1189 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); | 1195 | return GenerateUnary(operation, "any", Type::Bool, Type::Bool2); |
| 1190 | } | 1196 | } |
| 1191 | 1197 | ||
| 1198 | template <bool with_nan> | ||
| 1199 | std::string GenerateHalfComparison(Operation operation, std::string compare_op) { | ||
| 1200 | std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, | ||
| 1201 | Type::HalfFloat, Type::HalfFloat)}; | ||
| 1202 | if constexpr (!with_nan) { | ||
| 1203 | return comparison; | ||
| 1204 | } | ||
| 1205 | return "halfFloatNanComparison(" + comparison + ", " + | ||
| 1206 | VisitOperand(operation, 0, Type::HalfFloat) + ", " + | ||
| 1207 | VisitOperand(operation, 1, Type::HalfFloat) + ')'; | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | template <bool with_nan> | ||
| 1192 | std::string Logical2HLessThan(Operation operation) { | 1211 | std::string Logical2HLessThan(Operation operation) { |
| 1193 | return GenerateBinaryCall(operation, "lessThan", Type::Bool2, Type::HalfFloat, | 1212 | return GenerateHalfComparison<with_nan>(operation, "lessThan"); |
| 1194 | Type::HalfFloat); | ||
| 1195 | } | 1213 | } |
| 1196 | 1214 | ||
| 1215 | template <bool with_nan> | ||
| 1197 | std::string Logical2HEqual(Operation operation) { | 1216 | std::string Logical2HEqual(Operation operation) { |
| 1198 | return GenerateBinaryCall(operation, "equal", Type::Bool2, Type::HalfFloat, | 1217 | return GenerateHalfComparison<with_nan>(operation, "equal"); |
| 1199 | Type::HalfFloat); | ||
| 1200 | } | 1218 | } |
| 1201 | 1219 | ||
| 1220 | template <bool with_nan> | ||
| 1202 | std::string Logical2HLessEqual(Operation operation) { | 1221 | std::string Logical2HLessEqual(Operation operation) { |
| 1203 | return GenerateBinaryCall(operation, "lessThanEqual", Type::Bool2, Type::HalfFloat, | 1222 | return GenerateHalfComparison<with_nan>(operation, "lessThanEqual"); |
| 1204 | Type::HalfFloat); | ||
| 1205 | } | 1223 | } |
| 1206 | 1224 | ||
| 1225 | template <bool with_nan> | ||
| 1207 | std::string Logical2HGreaterThan(Operation operation) { | 1226 | std::string Logical2HGreaterThan(Operation operation) { |
| 1208 | return GenerateBinaryCall(operation, "greaterThan", Type::Bool2, Type::HalfFloat, | 1227 | return GenerateHalfComparison<with_nan>(operation, "greaterThan"); |
| 1209 | Type::HalfFloat); | ||
| 1210 | } | 1228 | } |
| 1211 | 1229 | ||
| 1230 | template <bool with_nan> | ||
| 1212 | std::string Logical2HNotEqual(Operation operation) { | 1231 | std::string Logical2HNotEqual(Operation operation) { |
| 1213 | return GenerateBinaryCall(operation, "notEqual", Type::Bool2, Type::HalfFloat, | 1232 | return GenerateHalfComparison<with_nan>(operation, "notEqual"); |
| 1214 | Type::HalfFloat); | ||
| 1215 | } | 1233 | } |
| 1216 | 1234 | ||
| 1235 | template <bool with_nan> | ||
| 1217 | std::string Logical2HGreaterEqual(Operation operation) { | 1236 | std::string Logical2HGreaterEqual(Operation operation) { |
| 1218 | return GenerateBinaryCall(operation, "greaterThanEqual", Type::Bool2, Type::HalfFloat, | 1237 | return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual"); |
| 1219 | Type::HalfFloat); | ||
| 1220 | } | 1238 | } |
| 1221 | 1239 | ||
| 1222 | std::string Texture(Operation operation) { | 1240 | std::string Texture(Operation operation) { |
| @@ -1505,6 +1523,8 @@ private: | |||
| 1505 | &GLSLDecompiler::Fma<Type::HalfFloat>, | 1523 | &GLSLDecompiler::Fma<Type::HalfFloat>, |
| 1506 | &GLSLDecompiler::Absolute<Type::HalfFloat>, | 1524 | &GLSLDecompiler::Absolute<Type::HalfFloat>, |
| 1507 | &GLSLDecompiler::HNegate, | 1525 | &GLSLDecompiler::HNegate, |
| 1526 | &GLSLDecompiler::HClamp, | ||
| 1527 | &GLSLDecompiler::HUnpack, | ||
| 1508 | &GLSLDecompiler::HMergeF32, | 1528 | &GLSLDecompiler::HMergeF32, |
| 1509 | &GLSLDecompiler::HMergeH0, | 1529 | &GLSLDecompiler::HMergeH0, |
| 1510 | &GLSLDecompiler::HMergeH1, | 1530 | &GLSLDecompiler::HMergeH1, |
| @@ -1541,12 +1561,18 @@ private: | |||
| 1541 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, | 1561 | &GLSLDecompiler::LogicalNotEqual<Type::Uint>, |
| 1542 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, | 1562 | &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, |
| 1543 | 1563 | ||
| 1544 | &GLSLDecompiler::Logical2HLessThan, | 1564 | &GLSLDecompiler::Logical2HLessThan<false>, |
| 1545 | &GLSLDecompiler::Logical2HEqual, | 1565 | &GLSLDecompiler::Logical2HEqual<false>, |
| 1546 | &GLSLDecompiler::Logical2HLessEqual, | 1566 | &GLSLDecompiler::Logical2HLessEqual<false>, |
| 1547 | &GLSLDecompiler::Logical2HGreaterThan, | 1567 | &GLSLDecompiler::Logical2HGreaterThan<false>, |
| 1548 | &GLSLDecompiler::Logical2HNotEqual, | 1568 | &GLSLDecompiler::Logical2HNotEqual<false>, |
| 1549 | &GLSLDecompiler::Logical2HGreaterEqual, | 1569 | &GLSLDecompiler::Logical2HGreaterEqual<false>, |
| 1570 | &GLSLDecompiler::Logical2HLessThan<true>, | ||
| 1571 | &GLSLDecompiler::Logical2HEqual<true>, | ||
| 1572 | &GLSLDecompiler::Logical2HLessEqual<true>, | ||
| 1573 | &GLSLDecompiler::Logical2HGreaterThan<true>, | ||
| 1574 | &GLSLDecompiler::Logical2HNotEqual<true>, | ||
| 1575 | &GLSLDecompiler::Logical2HGreaterEqual<true>, | ||
| 1550 | 1576 | ||
| 1551 | &GLSLDecompiler::Texture, | 1577 | &GLSLDecompiler::Texture, |
| 1552 | &GLSLDecompiler::TextureLod, | 1578 | &GLSLDecompiler::TextureLod, |
| @@ -1625,6 +1651,7 @@ private: | |||
| 1625 | return name + '_' + std::to_string(index) + '_' + suffix; | 1651 | return name + '_' + std::to_string(index) + '_' + suffix; |
| 1626 | } | 1652 | } |
| 1627 | 1653 | ||
| 1654 | const Device& device; | ||
| 1628 | const ShaderIR& ir; | 1655 | const ShaderIR& ir; |
| 1629 | const ShaderStage stage; | 1656 | const ShaderStage stage; |
| 1630 | const std::string suffix; | 1657 | const std::string suffix; |
| @@ -1647,11 +1674,18 @@ std::string GetCommonDeclarations() { | |||
| 1647 | "}\n\n" | 1674 | "}\n\n" |
| 1648 | "vec2 toHalf2(float value) {\n" | 1675 | "vec2 toHalf2(float value) {\n" |
| 1649 | " return unpackHalf2x16(ftou(value));\n" | 1676 | " return unpackHalf2x16(ftou(value));\n" |
| 1677 | "}\n\n" | ||
| 1678 | "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {\n" | ||
| 1679 | " bvec2 is_nan1 = isnan(pair1);\n" | ||
| 1680 | " bvec2 is_nan2 = isnan(pair2);\n" | ||
| 1681 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " | ||
| 1682 | "is_nan2.y);\n" | ||
| 1650 | "}\n"; | 1683 | "}\n"; |
| 1651 | } | 1684 | } |
| 1652 | 1685 | ||
| 1653 | ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const std::string& suffix) { | 1686 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, |
| 1654 | GLSLDecompiler decompiler(ir, stage, suffix); | 1687 | const std::string& suffix) { |
| 1688 | GLSLDecompiler decompiler(device, ir, stage, suffix); | ||
| 1655 | decompiler.Decompile(); | 1689 | decompiler.Decompile(); |
| 1656 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; | 1690 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; |
| 1657 | } | 1691 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 74032d237..c1569e737 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,6 +12,10 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 15 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| 16 | class ShaderIR; | 20 | class ShaderIR; |
| 17 | } | 21 | } |
| @@ -77,7 +81,7 @@ struct ShaderEntries { | |||
| 77 | 81 | ||
| 78 | std::string GetCommonDeclarations(); | 82 | std::string GetCommonDeclarations(); |
| 79 | 83 | ||
| 80 | ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, | 84 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 81 | const std::string& suffix); | 85 | Maxwell::ShaderStage stage, const std::string& suffix); |
| 82 | 86 | ||
| 83 | } // namespace OpenGL::GLShader | 87 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8763d9c71..6abf948f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -16,7 +16,7 @@ using VideoCommon::Shader::ShaderIR; | |||
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET{10}; |
| 18 | 18 | ||
| 19 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 21 | 21 | ||
| 22 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 22 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| @@ -34,14 +34,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 34 | 34 | ||
| 35 | )"; | 35 | )"; |
| 36 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 36 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); |
| 37 | ProgramResult program = Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | 37 | ProgramResult program = |
| 38 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 38 | 39 | ||
| 39 | out += program.first; | 40 | out += program.first; |
| 40 | 41 | ||
| 41 | if (setup.IsDualProgram()) { | 42 | if (setup.IsDualProgram()) { |
| 42 | ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); | 43 | ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); |
| 43 | ProgramResult program_b = | 44 | ProgramResult program_b = |
| 44 | Decompile(program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | 45 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); |
| 45 | 46 | ||
| 46 | out += program_b.first; | 47 | out += program_b.first; |
| 47 | } | 48 | } |
| @@ -57,6 +58,9 @@ void main() { | |||
| 57 | } | 58 | } |
| 58 | 59 | ||
| 59 | out += R"( | 60 | out += R"( |
| 61 | |||
| 62 | // Set Position Y direction | ||
| 63 | position.y *= utof(config_pack[2]); | ||
| 60 | // Check if the flip stage is VertexB | 64 | // Check if the flip stage is VertexB |
| 61 | // Config pack's second value is flip_stage | 65 | // Config pack's second value is flip_stage |
| 62 | if (config_pack[1] == 1) { | 66 | if (config_pack[1] == 1) { |
| @@ -75,7 +79,7 @@ void main() { | |||
| 75 | return {out, program.second}; | 79 | return {out, program.second}; |
| 76 | } | 80 | } |
| 77 | 81 | ||
| 78 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | 82 | ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { |
| 79 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 83 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 80 | 84 | ||
| 81 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 85 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| @@ -95,7 +99,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 95 | )"; | 99 | )"; |
| 96 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 100 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); |
| 97 | ProgramResult program = | 101 | ProgramResult program = |
| 98 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 102 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); |
| 99 | out += program.first; | 103 | out += program.first; |
| 100 | 104 | ||
| 101 | out += R"( | 105 | out += R"( |
| @@ -106,7 +110,7 @@ void main() { | |||
| 106 | return {out, program.second}; | 110 | return {out, program.second}; |
| 107 | } | 111 | } |
| 108 | 112 | ||
| 109 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | 113 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { |
| 110 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 114 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| 111 | 115 | ||
| 112 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 116 | std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| @@ -158,7 +162,7 @@ bool AlphaFunc(in float value) { | |||
| 158 | )"; | 162 | )"; |
| 159 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); | 163 | ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); |
| 160 | ProgramResult program = | 164 | ProgramResult program = |
| 161 | Decompile(program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | 165 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); |
| 162 | 166 | ||
| 163 | out += program.first; | 167 | out += program.first; |
| 164 | 168 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fad346b48..0536c8a03 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 11 | #include "video_core/shader/shader_ir.h" | 11 | #include "video_core/shader/shader_ir.h" |
| 12 | 12 | ||
| 13 | namespace OpenGL { | ||
| 14 | class Device; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace OpenGL::GLShader { | 17 | namespace OpenGL::GLShader { |
| 14 | 18 | ||
| 15 | using VideoCommon::Shader::ProgramCode; | 19 | using VideoCommon::Shader::ProgramCode; |
| @@ -39,22 +43,13 @@ private: | |||
| 39 | bool has_program_b{}; | 43 | bool has_program_b{}; |
| 40 | }; | 44 | }; |
| 41 | 45 | ||
| 42 | /** | 46 | /// Generates the GLSL vertex shader program source code for the given VS program |
| 43 | * Generates the GLSL vertex shader program source code for the given VS program | 47 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); |
| 44 | * @returns String of the shader source code | 48 | |
| 45 | */ | 49 | /// Generates the GLSL geometry shader program source code for the given GS program |
| 46 | ProgramResult GenerateVertexShader(const ShaderSetup& setup); | 50 | ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); |
| 47 | 51 | ||
| 48 | /** | 52 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 49 | * Generates the GLSL geometry shader program source code for the given GS program | 53 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 50 | * @returns String of the shader source code | ||
| 51 | */ | ||
| 52 | ProgramResult GenerateGeometryShader(const ShaderSetup& setup); | ||
| 53 | |||
| 54 | /** | ||
| 55 | * Generates the GLSL fragment shader program source code for the given FS program | ||
| 56 | * @returns String of the shader source code | ||
| 57 | */ | ||
| 58 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup); | ||
| 59 | 54 | ||
| 60 | } // namespace OpenGL::GLShader | 55 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 25500f9a3..23d9b10db 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -76,14 +76,10 @@ constexpr u32 GetGenericAttributeLocation(Attribute::Index attribute) { | |||
| 76 | 76 | ||
| 77 | /// Returns true if an object has to be treated as precise | 77 | /// Returns true if an object has to be treated as precise |
| 78 | bool IsPrecise(Operation operand) { | 78 | bool IsPrecise(Operation operand) { |
| 79 | const auto& meta = operand.GetMeta(); | 79 | const auto& meta{operand.GetMeta()}; |
| 80 | |||
| 81 | if (std::holds_alternative<MetaArithmetic>(meta)) { | 80 | if (std::holds_alternative<MetaArithmetic>(meta)) { |
| 82 | return std::get<MetaArithmetic>(meta).precise; | 81 | return std::get<MetaArithmetic>(meta).precise; |
| 83 | } | 82 | } |
| 84 | if (std::holds_alternative<MetaHalfArithmetic>(meta)) { | ||
| 85 | return std::get<MetaHalfArithmetic>(meta).precise; | ||
| 86 | } | ||
| 87 | return false; | 83 | return false; |
| 88 | } | 84 | } |
| 89 | 85 | ||
| @@ -746,6 +742,16 @@ private: | |||
| 746 | return {}; | 742 | return {}; |
| 747 | } | 743 | } |
| 748 | 744 | ||
| 745 | Id HClamp(Operation operation) { | ||
| 746 | UNIMPLEMENTED(); | ||
| 747 | return {}; | ||
| 748 | } | ||
| 749 | |||
| 750 | Id HUnpack(Operation operation) { | ||
| 751 | UNIMPLEMENTED(); | ||
| 752 | return {}; | ||
| 753 | } | ||
| 754 | |||
| 749 | Id HMergeF32(Operation operation) { | 755 | Id HMergeF32(Operation operation) { |
| 750 | UNIMPLEMENTED(); | 756 | UNIMPLEMENTED(); |
| 751 | return {}; | 757 | return {}; |
| @@ -1218,6 +1224,8 @@ private: | |||
| 1218 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, | 1224 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, |
| 1219 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | 1225 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |
| 1220 | &SPIRVDecompiler::HNegate, | 1226 | &SPIRVDecompiler::HNegate, |
| 1227 | &SPIRVDecompiler::HClamp, | ||
| 1228 | &SPIRVDecompiler::HUnpack, | ||
| 1221 | &SPIRVDecompiler::HMergeF32, | 1229 | &SPIRVDecompiler::HMergeF32, |
| 1222 | &SPIRVDecompiler::HMergeH0, | 1230 | &SPIRVDecompiler::HMergeH0, |
| 1223 | &SPIRVDecompiler::HMergeH1, | 1231 | &SPIRVDecompiler::HMergeH1, |
| @@ -1260,6 +1268,13 @@ private: | |||
| 1260 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, | 1268 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, |
| 1261 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, | 1269 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, |
| 1262 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, | 1270 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, |
| 1271 | // TODO(Rodrigo): Should these use the OpFUnord* variants? | ||
| 1272 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::HalfFloat>, | ||
| 1273 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::HalfFloat>, | ||
| 1274 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::HalfFloat>, | ||
| 1275 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::HalfFloat>, | ||
| 1276 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::HalfFloat>, | ||
| 1277 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::HalfFloat>, | ||
| 1263 | 1278 | ||
| 1264 | &SPIRVDecompiler::Texture, | 1279 | &SPIRVDecompiler::Texture, |
| 1265 | &SPIRVDecompiler::TextureLod, | 1280 | &SPIRVDecompiler::TextureLod, |
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index baee89107..9467f9417 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -18,7 +18,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 18 | 18 | ||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || |
| 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { |
| 21 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | 21 | if (instr.alu_half.ftz != 0) { |
| 22 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 23 | } | ||
| 22 | } | 24 | } |
| 23 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | 25 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); |
| 24 | 26 | ||
| @@ -27,9 +29,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 27 | const bool negate_b = | 29 | const bool negate_b = |
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | 30 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; |
| 29 | 31 | ||
| 30 | const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); | 32 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); |
| 31 | 33 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); | |
| 32 | // instr.alu_half.type_a | ||
| 33 | 34 | ||
| 34 | Node op_b = [&]() { | 35 | Node op_b = [&]() { |
| 35 | switch (opcode->get().GetId()) { | 36 | switch (opcode->get().GetId()) { |
| @@ -44,17 +45,17 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||
| 44 | return Immediate(0); | 45 | return Immediate(0); |
| 45 | } | 46 | } |
| 46 | }(); | 47 | }(); |
| 48 | op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); | ||
| 47 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | 49 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); |
| 48 | 50 | ||
| 49 | Node value = [&]() { | 51 | Node value = [&]() { |
| 50 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; | ||
| 51 | switch (opcode->get().GetId()) { | 52 | switch (opcode->get().GetId()) { |
| 52 | case OpCode::Id::HADD2_C: | 53 | case OpCode::Id::HADD2_C: |
| 53 | case OpCode::Id::HADD2_R: | 54 | case OpCode::Id::HADD2_R: |
| 54 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | 55 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |
| 55 | case OpCode::Id::HMUL2_C: | 56 | case OpCode::Id::HMUL2_C: |
| 56 | case OpCode::Id::HMUL2_R: | 57 | case OpCode::Id::HMUL2_R: |
| 57 | return Operation(OperationCode::HMul, meta, op_a, op_b); | 58 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |
| 58 | default: | 59 | default: |
| 59 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | 60 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |
| 60 | return Immediate(0); | 61 | return Immediate(0); |
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp index c2164ba50..fbcd35b18 100644 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -17,34 +17,33 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | |||
| 17 | const auto opcode = OpCode::Decode(instr); | 17 | const auto opcode = OpCode::Decode(instr); |
| 18 | 18 | ||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { |
| 20 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | 20 | if (instr.alu_half_imm.ftz != 0) { |
| 21 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 22 | } | ||
| 21 | } else { | 23 | } else { |
| 22 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | 24 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); |
| 23 | } | 25 | } |
| 24 | UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, | ||
| 25 | "Half float immediate saturation not implemented"); | ||
| 26 | 26 | ||
| 27 | Node op_a = GetRegister(instr.gpr8); | 27 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); |
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); |
| 29 | 29 | ||
| 30 | const Node op_b = UnpackHalfImmediate(instr, true); | 30 | const Node op_b = UnpackHalfImmediate(instr, true); |
| 31 | 31 | ||
| 32 | Node value = [&]() { | 32 | Node value = [&]() { |
| 33 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; | ||
| 34 | switch (opcode->get().GetId()) { | 33 | switch (opcode->get().GetId()) { |
| 35 | case OpCode::Id::HADD2_IMM: | 34 | case OpCode::Id::HADD2_IMM: |
| 36 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | 35 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |
| 37 | case OpCode::Id::HMUL2_IMM: | 36 | case OpCode::Id::HMUL2_IMM: |
| 38 | return Operation(OperationCode::HMul, meta, op_a, op_b); | 37 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |
| 39 | default: | 38 | default: |
| 40 | UNREACHABLE(); | 39 | UNREACHABLE(); |
| 41 | return Immediate(0); | 40 | return Immediate(0); |
| 42 | } | 41 | } |
| 43 | }(); | 42 | }(); |
| 44 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 45 | 43 | ||
| 44 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 45 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 46 | SetRegister(bb, instr.gpr0, value); | 46 | SetRegister(bb, instr.gpr0, value); |
| 47 | |||
| 48 | return pc; | 47 | return pc; |
| 49 | } | 48 | } |
| 50 | 49 | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 55a6fbbf2..ba15b1115 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -18,13 +18,29 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | switch (opcode->get().GetId()) { | 20 | switch (opcode->get().GetId()) { |
| 21 | case OpCode::Id::I2I_R: { | 21 | case OpCode::Id::I2I_R: |
| 22 | case OpCode::Id::I2I_C: | ||
| 23 | case OpCode::Id::I2I_IMM: { | ||
| 22 | UNIMPLEMENTED_IF(instr.conversion.selector); | 24 | UNIMPLEMENTED_IF(instr.conversion.selector); |
| 25 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | ||
| 26 | UNIMPLEMENTED_IF(instr.alu.saturate_d); | ||
| 23 | 27 | ||
| 24 | const bool input_signed = instr.conversion.is_input_signed; | 28 | const bool input_signed = instr.conversion.is_input_signed; |
| 25 | const bool output_signed = instr.conversion.is_output_signed; | 29 | const bool output_signed = instr.conversion.is_output_signed; |
| 26 | 30 | ||
| 27 | Node value = GetRegister(instr.gpr20); | 31 | Node value = [&]() { |
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::I2I_R: | ||
| 34 | return GetRegister(instr.gpr20); | ||
| 35 | case OpCode::Id::I2I_C: | ||
| 36 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 37 | case OpCode::Id::I2I_IMM: | ||
| 38 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return Immediate(0); | ||
| 42 | } | ||
| 43 | }(); | ||
| 28 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | 44 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); |
| 29 | 45 | ||
| 30 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, | 46 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, |
| @@ -38,17 +54,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 38 | break; | 54 | break; |
| 39 | } | 55 | } |
| 40 | case OpCode::Id::I2F_R: | 56 | case OpCode::Id::I2F_R: |
| 41 | case OpCode::Id::I2F_C: { | 57 | case OpCode::Id::I2F_C: |
| 42 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | 58 | case OpCode::Id::I2F_IMM: { |
| 59 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | ||
| 43 | UNIMPLEMENTED_IF(instr.conversion.selector); | 60 | UNIMPLEMENTED_IF(instr.conversion.selector); |
| 44 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 61 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 45 | "Condition codes generation in I2F is not implemented"); | 62 | "Condition codes generation in I2F is not implemented"); |
| 46 | 63 | ||
| 47 | Node value = [&]() { | 64 | Node value = [&]() { |
| 48 | if (instr.is_b_gpr) { | 65 | switch (opcode->get().GetId()) { |
| 66 | case OpCode::Id::I2F_R: | ||
| 49 | return GetRegister(instr.gpr20); | 67 | return GetRegister(instr.gpr20); |
| 50 | } else { | 68 | case OpCode::Id::I2F_C: |
| 51 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 69 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 70 | case OpCode::Id::I2F_IMM: | ||
| 71 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return Immediate(0); | ||
| 52 | } | 75 | } |
| 53 | }(); | 76 | }(); |
| 54 | const bool input_signed = instr.conversion.is_input_signed; | 77 | const bool input_signed = instr.conversion.is_input_signed; |
| @@ -62,24 +85,31 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 62 | break; | 85 | break; |
| 63 | } | 86 | } |
| 64 | case OpCode::Id::F2F_R: | 87 | case OpCode::Id::F2F_R: |
| 65 | case OpCode::Id::F2F_C: { | 88 | case OpCode::Id::F2F_C: |
| 66 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | 89 | case OpCode::Id::F2F_IMM: { |
| 67 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 90 | UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); |
| 91 | UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | ||
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 92 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 69 | "Condition codes generation in F2F is not implemented"); | 93 | "Condition codes generation in F2F is not implemented"); |
| 70 | 94 | ||
| 71 | Node value = [&]() { | 95 | Node value = [&]() { |
| 72 | if (instr.is_b_gpr) { | 96 | switch (opcode->get().GetId()) { |
| 97 | case OpCode::Id::F2F_R: | ||
| 73 | return GetRegister(instr.gpr20); | 98 | return GetRegister(instr.gpr20); |
| 74 | } else { | 99 | case OpCode::Id::F2F_C: |
| 75 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 100 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 101 | case OpCode::Id::F2F_IMM: | ||
| 102 | return GetImmediate19(instr); | ||
| 103 | default: | ||
| 104 | UNREACHABLE(); | ||
| 105 | return Immediate(0); | ||
| 76 | } | 106 | } |
| 77 | }(); | 107 | }(); |
| 78 | 108 | ||
| 79 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | 109 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |
| 80 | 110 | ||
| 81 | value = [&]() { | 111 | value = [&]() { |
| 82 | switch (instr.conversion.f2f.rounding) { | 112 | switch (instr.conversion.f2f.GetRoundingMode()) { |
| 83 | case Tegra::Shader::F2fRoundingOp::None: | 113 | case Tegra::Shader::F2fRoundingOp::None: |
| 84 | return value; | 114 | return value; |
| 85 | case Tegra::Shader::F2fRoundingOp::Round: | 115 | case Tegra::Shader::F2fRoundingOp::Round: |
| @@ -102,15 +132,22 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 102 | break; | 132 | break; |
| 103 | } | 133 | } |
| 104 | case OpCode::Id::F2I_R: | 134 | case OpCode::Id::F2I_R: |
| 105 | case OpCode::Id::F2I_C: { | 135 | case OpCode::Id::F2I_C: |
| 136 | case OpCode::Id::F2I_IMM: { | ||
| 106 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | 137 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |
| 107 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 138 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, |
| 108 | "Condition codes generation in F2I is not implemented"); | 139 | "Condition codes generation in F2I is not implemented"); |
| 109 | Node value = [&]() { | 140 | Node value = [&]() { |
| 110 | if (instr.is_b_gpr) { | 141 | switch (opcode->get().GetId()) { |
| 142 | case OpCode::Id::F2I_R: | ||
| 111 | return GetRegister(instr.gpr20); | 143 | return GetRegister(instr.gpr20); |
| 112 | } else { | 144 | case OpCode::Id::F2I_C: |
| 113 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | 145 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 146 | case OpCode::Id::F2I_IMM: | ||
| 147 | return GetImmediate19(instr); | ||
| 148 | default: | ||
| 149 | UNREACHABLE(); | ||
| 150 | return Immediate(0); | ||
| 114 | } | 151 | } |
| 115 | }(); | 152 | }(); |
| 116 | 153 | ||
| @@ -134,7 +171,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 134 | }(); | 171 | }(); |
| 135 | const bool is_signed = instr.conversion.is_output_signed; | 172 | const bool is_signed = instr.conversion.is_output_signed; |
| 136 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | 173 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); |
| 137 | value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); | 174 | value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); |
| 138 | 175 | ||
| 139 | SetRegister(bb, instr.gpr0, value); | 176 | SetRegister(bb, instr.gpr0, value); |
| 140 | break; | 177 | break; |
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 748368555..1dd94bf9d 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -18,11 +18,13 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 18 | const Instruction instr = {program_code[pc]}; | 18 | const Instruction instr = {program_code[pc]}; |
| 19 | const auto opcode = OpCode::Decode(instr); | 19 | const auto opcode = OpCode::Decode(instr); |
| 20 | 20 | ||
| 21 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | 21 | if (instr.hset2.ftz != 0) { |
| 22 | LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 26 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 22 | 27 | ||
| 23 | // instr.hset2.type_a | ||
| 24 | // instr.hset2.type_b | ||
| 25 | Node op_a = GetRegister(instr.gpr8); | ||
| 26 | Node op_b = [&]() { | 28 | Node op_b = [&]() { |
| 27 | switch (opcode->get().GetId()) { | 29 | switch (opcode->get().GetId()) { |
| 28 | case OpCode::Id::HSET2_R: | 30 | case OpCode::Id::HSET2_R: |
| @@ -32,14 +34,12 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | |||
| 32 | return Immediate(0); | 34 | return Immediate(0); |
| 33 | } | 35 | } |
| 34 | }(); | 36 | }(); |
| 35 | 37 | op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); | |
| 36 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 37 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | 38 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); |
| 38 | 39 | ||
| 39 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | 40 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
| 40 | 41 | ||
| 41 | MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; | 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); |
| 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); | ||
| 43 | 43 | ||
| 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
| 45 | 45 | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp index e68512692..6e59eb650 100644 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -19,10 +19,10 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); |
| 21 | 21 | ||
| 22 | Node op_a = GetRegister(instr.gpr8); | 22 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); |
| 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); |
| 24 | 24 | ||
| 25 | const Node op_b = [&]() { | 25 | Node op_b = [&]() { |
| 26 | switch (opcode->get().GetId()) { | 26 | switch (opcode->get().GetId()) { |
| 27 | case OpCode::Id::HSETP2_R: | 27 | case OpCode::Id::HSETP2_R: |
| 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, |
| @@ -32,6 +32,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 32 | return Immediate(0); | 32 | return Immediate(0); |
| 33 | } | 33 | } |
| 34 | }(); | 34 | }(); |
| 35 | op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); | ||
| 35 | 36 | ||
| 36 | // We can't use the constant predicate as destination. | 37 | // We can't use the constant predicate as destination. |
| 37 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | 38 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); |
| @@ -42,8 +43,7 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | |||
| 42 | const OperationCode pair_combiner = | 43 | const OperationCode pair_combiner = |
| 43 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | 44 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; |
| 44 | 45 | ||
| 45 | MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; | 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b); |
| 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); | ||
| 47 | const Node first_pred = Operation(pair_combiner, comparison); | 47 | const Node first_pred = Operation(pair_combiner, comparison); |
| 48 | 48 | ||
| 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate | 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate |
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 7a07c5ec6..5c1becce5 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -27,10 +27,6 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | constexpr auto identity = HalfType::H0_H1; | 29 | constexpr auto identity = HalfType::H0_H1; |
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | 30 | bool neg_b{}, neg_c{}; |
| 35 | auto [saturate, type_b, op_b, type_c, | 31 | auto [saturate, type_b, op_b, type_c, |
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | 32 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { |
| @@ -62,11 +58,11 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||
| 62 | }(); | 58 | }(); |
| 63 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | 59 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); |
| 64 | 60 | ||
| 65 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | 61 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); |
| 66 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | 62 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); |
| 63 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 67 | 64 | ||
| 68 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | 65 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); |
| 69 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 70 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |
| 71 | 67 | ||
| 72 | SetRegister(bb, instr.gpr0, value); | 68 | SetRegister(bb, instr.gpr0, value); |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index ac5112d78..17f2f711c 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -189,7 +189,11 @@ Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | |||
| 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); |
| 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); |
| 191 | 191 | ||
| 192 | return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); | 192 | return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); |
| 193 | } | ||
| 194 | |||
| 195 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | ||
| 196 | return Operation(OperationCode::HUnpack, type, value); | ||
| 193 | } | 197 | } |
| 194 | 198 | ||
| 195 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | 199 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { |
| @@ -209,17 +213,26 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | |||
| 209 | 213 | ||
| 210 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | 214 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { |
| 211 | if (absolute) { | 215 | if (absolute) { |
| 212 | value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); | 216 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); |
| 213 | } | 217 | } |
| 214 | if (negate) { | 218 | if (negate) { |
| 215 | value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), | 219 | value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), |
| 216 | GetPredicate(true)); | 220 | GetPredicate(true)); |
| 217 | } | 221 | } |
| 218 | return value; | 222 | return value; |
| 219 | } | 223 | } |
| 220 | 224 | ||
| 225 | Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | ||
| 226 | if (!saturate) { | ||
| 227 | return value; | ||
| 228 | } | ||
| 229 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 230 | const Node positive_one = Immediate(1.0f); | ||
| 231 | return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); | ||
| 232 | } | ||
| 233 | |||
| 221 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 234 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| 222 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 235 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
| 223 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | 236 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, |
| 224 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | 237 | {PredCondition::Equal, OperationCode::LogicalFEqual}, |
| 225 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | 238 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, |
| @@ -255,7 +268,7 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 255 | 268 | ||
| 256 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | 269 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |
| 257 | Node op_b) { | 270 | Node op_b) { |
| 258 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 271 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { |
| 259 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | 272 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, |
| 260 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | 273 | {PredCondition::Equal, OperationCode::LogicalIEqual}, |
| 261 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | 274 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, |
| @@ -283,40 +296,32 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 283 | return predicate; | 296 | return predicate; |
| 284 | } | 297 | } |
| 285 | 298 | ||
| 286 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | 299 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, |
| 287 | const MetaHalfArithmetic& meta, Node op_a, Node op_b) { | 300 | Node op_b) { |
| 288 | 301 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | |
| 289 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 290 | condition == PredCondition::NotEqualWithNan || | ||
| 291 | condition == PredCondition::LessEqualWithNan || | ||
| 292 | condition == PredCondition::GreaterThanWithNan || | ||
| 293 | condition == PredCondition::GreaterEqualWithNan, | ||
| 294 | "Unimplemented NaN comparison for half floats"); | ||
| 295 | |||
| 296 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 297 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | 302 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, |
| 298 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | 303 | {PredCondition::Equal, OperationCode::Logical2HEqual}, |
| 299 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | 304 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, |
| 300 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | 305 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, |
| 301 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | 306 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, |
| 302 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | 307 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, |
| 303 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, | 308 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, |
| 304 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, | 309 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, |
| 305 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, | 310 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, |
| 306 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, | 311 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, |
| 307 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; | 312 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; |
| 308 | 313 | ||
| 309 | const auto comparison{PredicateComparisonTable.find(condition)}; | 314 | const auto comparison{PredicateComparisonTable.find(condition)}; |
| 310 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 315 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), |
| 311 | "Unknown predicate comparison operation"); | 316 | "Unknown predicate comparison operation"); |
| 312 | 317 | ||
| 313 | const Node predicate = Operation(comparison->second, meta, op_a, op_b); | 318 | const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); |
| 314 | 319 | ||
| 315 | return predicate; | 320 | return predicate; |
| 316 | } | 321 | } |
| 317 | 322 | ||
| 318 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 323 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| 319 | static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | 324 | const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { |
| 320 | {PredOperation::And, OperationCode::LogicalAnd}, | 325 | {PredOperation::And, OperationCode::LogicalAnd}, |
| 321 | {PredOperation::Or, OperationCode::LogicalOr}, | 326 | {PredOperation::Or, OperationCode::LogicalOr}, |
| 322 | {PredOperation::Xor, OperationCode::LogicalXor}, | 327 | {PredOperation::Xor, OperationCode::LogicalXor}, |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 57af8b10f..81278fb33 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -109,11 +109,13 @@ enum class OperationCode { | |||
| 109 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | 109 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint |
| 110 | UBitCount, /// (MetaArithmetic, uint) -> uint | 110 | UBitCount, /// (MetaArithmetic, uint) -> uint |
| 111 | 111 | ||
| 112 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 112 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 113 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | 113 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 |
| 114 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | 114 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 |
| 115 | HAbsolute, /// (f16vec2 a) -> f16vec2 | 115 | HAbsolute, /// (f16vec2 a) -> f16vec2 |
| 116 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | 116 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 |
| 117 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | ||
| 118 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | ||
| 117 | HMergeF32, /// (f16vec2 src) -> float | 119 | HMergeF32, /// (f16vec2 src) -> float |
| 118 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 120 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 119 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 121 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| @@ -150,12 +152,18 @@ enum class OperationCode { | |||
| 150 | LogicalUNotEqual, /// (uint a, uint b) -> bool | 152 | LogicalUNotEqual, /// (uint a, uint b) -> bool |
| 151 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | 153 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool |
| 152 | 154 | ||
| 153 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 155 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 154 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 156 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 155 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 157 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 156 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 158 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 157 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 159 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 158 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | 160 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 |
| 161 | Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 162 | Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 163 | Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 164 | Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 165 | Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 166 | Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 159 | 167 | ||
| 160 | Texture, /// (MetaTexture, float[N] coords) -> float4 | 168 | Texture, /// (MetaTexture, float[N] coords) -> float4 |
| 161 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 | 169 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 |
| @@ -308,13 +316,6 @@ struct MetaArithmetic { | |||
| 308 | bool precise{}; | 316 | bool precise{}; |
| 309 | }; | 317 | }; |
| 310 | 318 | ||
| 311 | struct MetaHalfArithmetic { | ||
| 312 | bool precise{}; | ||
| 313 | std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, | ||
| 314 | Tegra::Shader::HalfType::H0_H1, | ||
| 315 | Tegra::Shader::HalfType::H0_H1}; | ||
| 316 | }; | ||
| 317 | |||
| 318 | struct MetaTexture { | 319 | struct MetaTexture { |
| 319 | const Sampler& sampler; | 320 | const Sampler& sampler; |
| 320 | Node array{}; | 321 | Node array{}; |
| @@ -326,11 +327,10 @@ struct MetaTexture { | |||
| 326 | u32 element{}; | 327 | u32 element{}; |
| 327 | }; | 328 | }; |
| 328 | 329 | ||
| 329 | constexpr MetaArithmetic PRECISE = {true}; | 330 | inline constexpr MetaArithmetic PRECISE = {true}; |
| 330 | constexpr MetaArithmetic NO_PRECISE = {false}; | 331 | inline constexpr MetaArithmetic NO_PRECISE = {false}; |
| 331 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | ||
| 332 | 332 | ||
| 333 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; | 333 | using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; |
| 334 | 334 | ||
| 335 | /// Holds any kind of operation that can be done in the IR | 335 | /// Holds any kind of operation that can be done in the IR |
| 336 | class OperationNode final { | 336 | class OperationNode final { |
| @@ -734,10 +734,14 @@ private: | |||
| 734 | 734 | ||
| 735 | /// Unpacks a half immediate from an instruction | 735 | /// Unpacks a half immediate from an instruction |
| 736 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | 736 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); |
| 737 | /// Unpacks a binary value into a half float pair with a type format | ||
| 738 | Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); | ||
| 737 | /// Merges a half pair into another value | 739 | /// Merges a half pair into another value |
| 738 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | 740 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); |
| 739 | /// Conditionally absolute/negated half float pair. Absolute is applied first | 741 | /// Conditionally absolute/negated half float pair. Absolute is applied first |
| 740 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | 742 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); |
| 743 | /// Conditionally saturates a half float pair | ||
| 744 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||
| 741 | 745 | ||
| 742 | /// Returns a predicate comparing two floats | 746 | /// Returns a predicate comparing two floats |
| 743 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | 747 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
| @@ -745,8 +749,7 @@ private: | |||
| 745 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | 749 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, |
| 746 | Node op_a, Node op_b); | 750 | Node op_a, Node op_b); |
| 747 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | 751 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared |
| 748 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | 752 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
| 749 | const MetaHalfArithmetic& meta, Node op_a, Node op_b); | ||
| 750 | 753 | ||
| 751 | /// Returns a predicate combiner operation | 754 | /// Returns a predicate combiner operation |
| 752 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | 755 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068..217805386 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 288 | } | 288 | } |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||
| 293 | u8* swizzle_data) { | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | ||
| 295 | std::size_t count = 0; | ||
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | ||
| 297 | const std::size_t gob_address_y = | ||
| 298 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | ||
| 299 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | ||
| 300 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||
| 301 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | ||
| 302 | const std::size_t gob_address = | ||
| 303 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | ||
| 304 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | ||
| 305 | const u8* source_line = source_data + count; | ||
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | ||
| 307 | count++; | ||
| 308 | |||
| 309 | std::memcpy(dest_addr, source_line, 1); | ||
| 310 | } | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 291 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 314 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 292 | u32 height) { | 315 | u32 height) { |
| 293 | std::vector<u8> rgba_data; | 316 | std::vector<u8> rgba_data; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa274..e072d8401 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 52 | u32 offset_x, u32 offset_y); | 52 | u32 offset_x, u32 offset_y); |
| 53 | 53 | ||
| 54 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 55 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||
| 56 | u8* swizzle_data); | ||
| 57 | |||
| 54 | } // namespace Tegra::Texture | 58 | } // namespace Tegra::Texture |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index cb82ecf3f..60cda0ca3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | #include "core/core.h" | 6 | #include "core/core.h" |
| 7 | #include "core/settings.h" | 7 | #include "core/settings.h" |
| 8 | #include "video_core/gpu_asynch.h" | ||
| 9 | #include "video_core/gpu_synch.h" | ||
| 8 | #include "video_core/renderer_base.h" | 10 | #include "video_core/renderer_base.h" |
| 9 | #include "video_core/renderer_opengl/renderer_opengl.h" | 11 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 10 | #include "video_core/video_core.h" | 12 | #include "video_core/video_core.h" |
| @@ -16,6 +18,14 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind | |||
| 16 | return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); | 18 | return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); |
| 17 | } | 19 | } |
| 18 | 20 | ||
| 21 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { | ||
| 22 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 23 | return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); | ||
| 24 | } | ||
| 25 | |||
| 26 | return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); | ||
| 27 | } | ||
| 28 | |||
| 19 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { | 29 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { |
| 20 | return static_cast<u16>( | 30 | return static_cast<u16>( |
| 21 | Settings::values.resolution_factor | 31 | Settings::values.resolution_factor |
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 3c583f195..b8e0ac372 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h | |||
| @@ -14,6 +14,10 @@ namespace Core::Frontend { | |||
| 14 | class EmuWindow; | 14 | class EmuWindow; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | namespace Tegra { | ||
| 18 | class GPU; | ||
| 19 | } | ||
| 20 | |||
| 17 | namespace VideoCore { | 21 | namespace VideoCore { |
| 18 | 22 | ||
| 19 | class RendererBase; | 23 | class RendererBase; |
| @@ -27,6 +31,9 @@ class RendererBase; | |||
| 27 | std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, | 31 | std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, |
| 28 | Core::System& system); | 32 | Core::System& system); |
| 29 | 33 | ||
| 34 | /// Creates an emulated GPU instance using the given system context. | ||
| 35 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); | ||
| 36 | |||
| 30 | u16 GetResolutionScaleFactor(const RendererBase& renderer); | 37 | u16 GetResolutionScaleFactor(const RendererBase& renderer); |
| 31 | 38 | ||
| 32 | } // namespace VideoCore | 39 | } // namespace VideoCore |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index c29f2d2dc..7eed9fcf3 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -91,8 +91,8 @@ void EmuThread::run() { | |||
| 91 | 91 | ||
| 92 | class GGLContext : public Core::Frontend::GraphicsContext { | 92 | class GGLContext : public Core::Frontend::GraphicsContext { |
| 93 | public: | 93 | public: |
| 94 | explicit GGLContext(QOpenGLContext* shared_context) : surface() { | 94 | explicit GGLContext(QOpenGLContext* shared_context) |
| 95 | context = std::make_unique<QOpenGLContext>(shared_context); | 95 | : context{std::make_unique<QOpenGLContext>(shared_context)} { |
| 96 | surface.setFormat(shared_context->format()); | 96 | surface.setFormat(shared_context->format()); |
| 97 | surface.create(); | 97 | surface.create(); |
| 98 | } | 98 | } |
| @@ -186,8 +186,7 @@ private: | |||
| 186 | }; | 186 | }; |
| 187 | 187 | ||
| 188 | GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread) | 188 | GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread) |
| 189 | : QWidget(parent), child(nullptr), context(nullptr), emu_thread(emu_thread) { | 189 | : QWidget(parent), emu_thread(emu_thread) { |
| 190 | |||
| 191 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") | 190 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") |
| 192 | .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc)); | 191 | .arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc)); |
| 193 | setAttribute(Qt::WA_AcceptTouchEvents); | 192 | setAttribute(Qt::WA_AcceptTouchEvents); |
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index 9608b959f..3df33aca1 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include <QImage> | 10 | #include <QImage> |
| 11 | #include <QThread> | 11 | #include <QThread> |
| 12 | #include <QWidget> | 12 | #include <QWidget> |
| 13 | #include "common/thread.h" | ||
| 14 | #include "core/core.h" | 13 | #include "core/core.h" |
| 15 | #include "core/frontend/emu_window.h" | 14 | #include "core/frontend/emu_window.h" |
| 16 | 15 | ||