diff options
Diffstat (limited to 'src')
21 files changed, 890 insertions, 691 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 789000294..4ee8c5733 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -48,8 +48,9 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 48 | addr, offset, width, height, stride, format); | 48 | addr, offset, width, height, stride, format); |
| 49 | 49 | ||
| 50 | const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); | 50 | const auto pixel_format = static_cast<Tegra::FramebufferConfig::PixelFormat>(format); |
| 51 | const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, | 51 | const auto transform_flags = static_cast<Tegra::FramebufferConfig::TransformFlags>(transform); |
| 52 | stride, pixel_format, transform, crop_rect}; | 52 | const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, |
| 53 | stride, pixel_format, transform_flags, crop_rect}; | ||
| 53 | 54 | ||
| 54 | system.GetPerfStats().EndSystemFrame(); | 55 | system.GetPerfStats().EndSystemFrame(); |
| 55 | system.GPU().SwapBuffers(&framebuffer); | 56 | system.GPU().SwapBuffers(&framebuffer); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 775e76330..8b4867ca7 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -111,7 +111,6 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 111 | event.event->GetWritableEvent().Signal(); | 111 | event.event->GetWritableEvent().Signal(); |
| 112 | return NvResult::Success; | 112 | return NvResult::Success; |
| 113 | } | 113 | } |
| 114 | auto lock = gpu.LockSync(); | ||
| 115 | const u32 current_syncpoint_value = event.fence.value; | 114 | const u32 current_syncpoint_value = event.fence.value; |
| 116 | const s32 diff = current_syncpoint_value - params.threshold; | 115 | const s32 diff = current_syncpoint_value - params.threshold; |
| 117 | if (diff >= 0) { | 116 | if (diff >= 0) { |
| @@ -132,23 +131,24 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector | |||
| 132 | } | 131 | } |
| 133 | 132 | ||
| 134 | EventState status = events_interface.status[event_id]; | 133 | EventState status = events_interface.status[event_id]; |
| 135 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | 134 | const bool bad_parameter = status != EventState::Free && status != EventState::Registered; |
| 136 | events_interface.SetEventStatus(event_id, EventState::Waiting); | 135 | if (bad_parameter) { |
| 137 | events_interface.assigned_syncpt[event_id] = params.syncpt_id; | ||
| 138 | events_interface.assigned_value[event_id] = target_value; | ||
| 139 | if (is_async) { | ||
| 140 | params.value = params.syncpt_id << 4; | ||
| 141 | } else { | ||
| 142 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||
| 143 | } | ||
| 144 | params.value |= event_id; | ||
| 145 | event.event->GetWritableEvent().Clear(); | ||
| 146 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||
| 147 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 136 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 148 | return NvResult::Timeout; | 137 | return NvResult::BadParameter; |
| 149 | } | 138 | } |
| 139 | events_interface.SetEventStatus(event_id, EventState::Waiting); | ||
| 140 | events_interface.assigned_syncpt[event_id] = params.syncpt_id; | ||
| 141 | events_interface.assigned_value[event_id] = target_value; | ||
| 142 | if (is_async) { | ||
| 143 | params.value = params.syncpt_id << 4; | ||
| 144 | } else { | ||
| 145 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | ||
| 146 | } | ||
| 147 | params.value |= event_id; | ||
| 148 | event.event->GetWritableEvent().Clear(); | ||
| 149 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | ||
| 150 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 150 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 151 | return NvResult::BadParameter; | 151 | return NvResult::Timeout; |
| 152 | } | 152 | } |
| 153 | 153 | ||
| 154 | NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { | 154 | NvResult nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index c0a380088..54ac105d5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -13,6 +13,14 @@ | |||
| 13 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 14 | 14 | ||
| 15 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 16 | namespace { | ||
| 17 | Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) { | ||
| 18 | Tegra::GPU::FenceAction result{}; | ||
| 19 | result.op.Assign(op); | ||
| 20 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 21 | return {result.raw}; | ||
| 22 | } | ||
| 23 | } // namespace | ||
| 16 | 24 | ||
| 17 | nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, | 25 | nvhost_gpu::nvhost_gpu(Core::System& system_, std::shared_ptr<nvmap> nvmap_dev_, |
| 18 | SyncpointManager& syncpoint_manager_) | 26 | SyncpointManager& syncpoint_manager_) |
| @@ -187,7 +195,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(Fence fence) { | |||
| 187 | {fence.value}, | 195 | {fence.value}, |
| 188 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 196 | Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 189 | Tegra::SubmissionMode::Increasing), | 197 | Tegra::SubmissionMode::Increasing), |
| 190 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Acquire, fence.id), | 198 | BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id), |
| 191 | }; | 199 | }; |
| 192 | } | 200 | } |
| 193 | 201 | ||
| @@ -200,8 +208,7 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(Fence fence, | |||
| 200 | for (u32 count = 0; count < add_increment; ++count) { | 208 | for (u32 count = 0; count < add_increment; ++count) { |
| 201 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, | 209 | result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1, |
| 202 | Tegra::SubmissionMode::Increasing)); | 210 | Tegra::SubmissionMode::Increasing)); |
| 203 | result.emplace_back( | 211 | result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id)); |
| 204 | Tegra::GPU::FenceAction::Build(Tegra::GPU::FenceOperation::Increment, fence.id)); | ||
| 205 | } | 212 | } |
| 206 | 213 | ||
| 207 | return result; | 214 | return result; |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3ead813b0..a22811ec1 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -13,28 +13,20 @@ | |||
| 13 | #include "common/thread.h" | 13 | #include "common/thread.h" |
| 14 | #include "core/core.h" | 14 | #include "core/core.h" |
| 15 | #include "core/core_timing.h" | 15 | #include "core/core_timing.h" |
| 16 | #include "core/core_timing_util.h" | ||
| 17 | #include "core/hardware_properties.h" | ||
| 18 | #include "core/hle/kernel/k_readable_event.h" | 16 | #include "core/hle/kernel/k_readable_event.h" |
| 19 | #include "core/hle/kernel/kernel.h" | ||
| 20 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 17 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 21 | #include "core/hle/service/nvdrv/nvdrv.h" | 18 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 22 | #include "core/hle/service/nvflinger/buffer_queue.h" | 19 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 23 | #include "core/hle/service/nvflinger/nvflinger.h" | 20 | #include "core/hle/service/nvflinger/nvflinger.h" |
| 24 | #include "core/hle/service/vi/display/vi_display.h" | 21 | #include "core/hle/service/vi/display/vi_display.h" |
| 25 | #include "core/hle/service/vi/layer/vi_layer.h" | 22 | #include "core/hle/service/vi/layer/vi_layer.h" |
| 26 | #include "core/perf_stats.h" | 23 | #include "video_core/gpu.h" |
| 27 | #include "video_core/renderer_base.h" | ||
| 28 | 24 | ||
| 29 | namespace Service::NVFlinger { | 25 | namespace Service::NVFlinger { |
| 30 | 26 | ||
| 31 | constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60}; | 27 | constexpr auto frame_ns = std::chrono::nanoseconds{1000000000 / 60}; |
| 32 | 28 | ||
| 33 | void NVFlinger::VSyncThread(NVFlinger& nv_flinger) { | 29 | void NVFlinger::SplitVSync(std::stop_token stop_token) { |
| 34 | nv_flinger.SplitVSync(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void NVFlinger::SplitVSync() { | ||
| 38 | system.RegisterHostThread(); | 30 | system.RegisterHostThread(); |
| 39 | std::string name = "yuzu:VSyncThread"; | 31 | std::string name = "yuzu:VSyncThread"; |
| 40 | MicroProfileOnThreadCreate(name.c_str()); | 32 | MicroProfileOnThreadCreate(name.c_str()); |
| @@ -45,7 +37,7 @@ void NVFlinger::SplitVSync() { | |||
| 45 | Common::SetCurrentThreadName(name.c_str()); | 37 | Common::SetCurrentThreadName(name.c_str()); |
| 46 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | 38 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); |
| 47 | s64 delay = 0; | 39 | s64 delay = 0; |
| 48 | while (is_running) { | 40 | while (!stop_token.stop_requested()) { |
| 49 | guard->lock(); | 41 | guard->lock(); |
| 50 | const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count(); | 42 | const s64 time_start = system.CoreTiming().GetGlobalTimeNs().count(); |
| 51 | Compose(); | 43 | Compose(); |
| @@ -55,7 +47,7 @@ void NVFlinger::SplitVSync() { | |||
| 55 | const s64 next_time = std::max<s64>(0, ticks - time_passed - delay); | 47 | const s64 next_time = std::max<s64>(0, ticks - time_passed - delay); |
| 56 | guard->unlock(); | 48 | guard->unlock(); |
| 57 | if (next_time > 0) { | 49 | if (next_time > 0) { |
| 58 | wait_event->WaitFor(std::chrono::nanoseconds{next_time}); | 50 | std::this_thread::sleep_for(std::chrono::nanoseconds{next_time}); |
| 59 | } | 51 | } |
| 60 | delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time; | 52 | delay = (system.CoreTiming().GetGlobalTimeNs().count() - time_end) - next_time; |
| 61 | } | 53 | } |
| @@ -84,9 +76,7 @@ NVFlinger::NVFlinger(Core::System& system_) | |||
| 84 | }); | 76 | }); |
| 85 | 77 | ||
| 86 | if (system.IsMulticore()) { | 78 | if (system.IsMulticore()) { |
| 87 | is_running = true; | 79 | vsync_thread = std::jthread([this](std::stop_token token) { SplitVSync(token); }); |
| 88 | wait_event = std::make_unique<Common::Event>(); | ||
| 89 | vsync_thread = std::make_unique<std::thread>(VSyncThread, std::ref(*this)); | ||
| 90 | } else { | 80 | } else { |
| 91 | system.CoreTiming().ScheduleEvent(frame_ns, composition_event); | 81 | system.CoreTiming().ScheduleEvent(frame_ns, composition_event); |
| 92 | } | 82 | } |
| @@ -96,14 +86,7 @@ NVFlinger::~NVFlinger() { | |||
| 96 | for (auto& buffer_queue : buffer_queues) { | 86 | for (auto& buffer_queue : buffer_queues) { |
| 97 | buffer_queue->Disconnect(); | 87 | buffer_queue->Disconnect(); |
| 98 | } | 88 | } |
| 99 | 89 | if (!system.IsMulticore()) { | |
| 100 | if (system.IsMulticore()) { | ||
| 101 | is_running = false; | ||
| 102 | wait_event->Set(); | ||
| 103 | vsync_thread->join(); | ||
| 104 | vsync_thread.reset(); | ||
| 105 | wait_event.reset(); | ||
| 106 | } else { | ||
| 107 | system.CoreTiming().UnscheduleEvent(composition_event, 0); | 90 | system.CoreTiming().UnscheduleEvent(composition_event, 0); |
| 108 | } | 91 | } |
| 109 | } | 92 | } |
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 6d84cafb4..7935cf773 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h | |||
| @@ -4,13 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 8 | #include <list> | 7 | #include <list> |
| 9 | #include <memory> | 8 | #include <memory> |
| 10 | #include <mutex> | 9 | #include <mutex> |
| 11 | #include <optional> | 10 | #include <optional> |
| 12 | #include <string> | ||
| 13 | #include <string_view> | ||
| 14 | #include <thread> | 11 | #include <thread> |
| 15 | #include <vector> | 12 | #include <vector> |
| 16 | 13 | ||
| @@ -109,9 +106,7 @@ private: | |||
| 109 | /// Creates a layer with the specified layer ID in the desired display. | 106 | /// Creates a layer with the specified layer ID in the desired display. |
| 110 | void CreateLayerAtId(VI::Display& display, u64 layer_id); | 107 | void CreateLayerAtId(VI::Display& display, u64 layer_id); |
| 111 | 108 | ||
| 112 | static void VSyncThread(NVFlinger& nv_flinger); | 109 | void SplitVSync(std::stop_token stop_token); |
| 113 | |||
| 114 | void SplitVSync(); | ||
| 115 | 110 | ||
| 116 | std::shared_ptr<Nvidia::Module> nvdrv; | 111 | std::shared_ptr<Nvidia::Module> nvdrv; |
| 117 | 112 | ||
| @@ -133,9 +128,7 @@ private: | |||
| 133 | 128 | ||
| 134 | Core::System& system; | 129 | Core::System& system; |
| 135 | 130 | ||
| 136 | std::unique_ptr<std::thread> vsync_thread; | 131 | std::jthread vsync_thread; |
| 137 | std::unique_ptr<Common::Event> wait_event; | ||
| 138 | std::atomic<bool> is_running{}; | ||
| 139 | 132 | ||
| 140 | KernelHelpers::ServiceContext service_context; | 133 | KernelHelpers::ServiceContext service_context; |
| 141 | }; | 134 | }; |
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 8b86ad050..a8c4b4415 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "command_classes/vic.h" | 24 | #include "command_classes/vic.h" |
| 25 | #include "video_core/cdma_pusher.h" | 25 | #include "video_core/cdma_pusher.h" |
| 26 | #include "video_core/command_classes/nvdec_common.h" | 26 | #include "video_core/command_classes/nvdec_common.h" |
| 27 | #include "video_core/command_classes/sync_manager.h" | ||
| 27 | #include "video_core/engines/maxwell_3d.h" | 28 | #include "video_core/engines/maxwell_3d.h" |
| 28 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 29 | #include "video_core/memory_manager.h" | 30 | #include "video_core/memory_manager.h" |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 1bada44dd..87b49d6ea 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -9,13 +9,13 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/command_classes/sync_manager.h" | ||
| 13 | 12 | ||
| 14 | namespace Tegra { | 13 | namespace Tegra { |
| 15 | 14 | ||
| 16 | class GPU; | 15 | class GPU; |
| 17 | class Host1x; | 16 | class Host1x; |
| 18 | class Nvdec; | 17 | class Nvdec; |
| 18 | class SyncptIncrManager; | ||
| 19 | class Vic; | 19 | class Vic; |
| 20 | 20 | ||
| 21 | enum class ChSubmissionMode : u32 { | 21 | enum class ChSubmissionMode : u32 { |
diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index b86c3a757..b1d455e30 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h | |||
| @@ -4,8 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | namespace Tegra { | 7 | #include "common/common_types.h" |
| 8 | #include "common/math_util.h" | ||
| 8 | 9 | ||
| 10 | namespace Tegra { | ||
| 9 | /** | 11 | /** |
| 10 | * Struct describing framebuffer configuration | 12 | * Struct describing framebuffer configuration |
| 11 | */ | 13 | */ |
| @@ -16,6 +18,21 @@ struct FramebufferConfig { | |||
| 16 | B8G8R8A8_UNORM = 5, | 18 | B8G8R8A8_UNORM = 5, |
| 17 | }; | 19 | }; |
| 18 | 20 | ||
| 21 | enum class TransformFlags : u32 { | ||
| 22 | /// No transform flags are set | ||
| 23 | Unset = 0x00, | ||
| 24 | /// Flip source image horizontally (around the vertical axis) | ||
| 25 | FlipH = 0x01, | ||
| 26 | /// Flip source image vertically (around the horizontal axis) | ||
| 27 | FlipV = 0x02, | ||
| 28 | /// Rotate source image 90 degrees clockwise | ||
| 29 | Rotate90 = 0x04, | ||
| 30 | /// Rotate source image 180 degrees | ||
| 31 | Rotate180 = 0x03, | ||
| 32 | /// Rotate source image 270 degrees clockwise | ||
| 33 | Rotate270 = 0x07, | ||
| 34 | }; | ||
| 35 | |||
| 19 | VAddr address{}; | 36 | VAddr address{}; |
| 20 | u32 offset{}; | 37 | u32 offset{}; |
| 21 | u32 width{}; | 38 | u32 width{}; |
| @@ -23,7 +40,6 @@ struct FramebufferConfig { | |||
| 23 | u32 stride{}; | 40 | u32 stride{}; |
| 24 | PixelFormat pixel_format{}; | 41 | PixelFormat pixel_format{}; |
| 25 | 42 | ||
| 26 | using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||
| 27 | TransformFlags transform_flags{}; | 43 | TransformFlags transform_flags{}; |
| 28 | Common::Rectangle<int> crop_rect; | 44 | Common::Rectangle<int> crop_rect; |
| 29 | }; | 45 | }; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 2ae3639b5..ab7c21a49 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -2,540 +2,913 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <atomic> | ||
| 5 | #include <chrono> | 7 | #include <chrono> |
| 8 | #include <condition_variable> | ||
| 9 | #include <list> | ||
| 10 | #include <memory> | ||
| 6 | 11 | ||
| 7 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 8 | #include "common/microprofile.h" | 13 | #include "common/microprofile.h" |
| 9 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 15 | #include "core/core.h" |
| 11 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 12 | #include "core/core_timing_util.h" | ||
| 13 | #include "core/frontend/emu_window.h" | 17 | #include "core/frontend/emu_window.h" |
| 14 | #include "core/hardware_interrupt_manager.h" | 18 | #include "core/hardware_interrupt_manager.h" |
| 15 | #include "core/memory.h" | 19 | #include "core/hle/service/nvdrv/nvdata.h" |
| 20 | #include "core/hle/service/nvflinger/buffer_queue.h" | ||
| 16 | #include "core/perf_stats.h" | 21 | #include "core/perf_stats.h" |
| 22 | #include "video_core/cdma_pusher.h" | ||
| 23 | #include "video_core/dma_pusher.h" | ||
| 17 | #include "video_core/engines/fermi_2d.h" | 24 | #include "video_core/engines/fermi_2d.h" |
| 18 | #include "video_core/engines/kepler_compute.h" | 25 | #include "video_core/engines/kepler_compute.h" |
| 19 | #include "video_core/engines/kepler_memory.h" | 26 | #include "video_core/engines/kepler_memory.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/maxwell_dma.h" | 28 | #include "video_core/engines/maxwell_dma.h" |
| 22 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 30 | #include "video_core/gpu_thread.h" | ||
| 23 | #include "video_core/memory_manager.h" | 31 | #include "video_core/memory_manager.h" |
| 24 | #include "video_core/renderer_base.h" | 32 | #include "video_core/renderer_base.h" |
| 25 | #include "video_core/shader_notify.h" | 33 | #include "video_core/shader_notify.h" |
| 26 | #include "video_core/video_core.h" | ||
| 27 | 34 | ||
| 28 | namespace Tegra { | 35 | namespace Tegra { |
| 29 | 36 | ||
| 30 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 37 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 31 | 38 | ||
| 32 | GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) | 39 | struct GPU::Impl { |
| 33 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, | 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 34 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, use_nvdec{use_nvdec_}, | 41 | : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( |
| 35 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | 42 | system)}, |
| 36 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | 43 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_}, |
| 37 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | 44 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, |
| 38 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | 45 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, |
| 39 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | 46 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, |
| 40 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 47 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, |
| 41 | gpu_thread{system_, is_async_} {} | 48 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, |
| 49 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | ||
| 50 | gpu_thread{system_, is_async_} {} | ||
| 51 | |||
| 52 | ~Impl() = default; | ||
| 53 | |||
| 54 | /// Binds a renderer to the GPU. | ||
| 55 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||
| 56 | renderer = std::move(renderer_); | ||
| 57 | rasterizer = renderer->ReadRasterizer(); | ||
| 58 | |||
| 59 | memory_manager->BindRasterizer(rasterizer); | ||
| 60 | maxwell_3d->BindRasterizer(rasterizer); | ||
| 61 | fermi_2d->BindRasterizer(rasterizer); | ||
| 62 | kepler_compute->BindRasterizer(rasterizer); | ||
| 63 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 64 | } | ||
| 42 | 65 | ||
| 43 | GPU::~GPU() = default; | 66 | /// Calls a GPU method. |
| 67 | void CallMethod(const GPU::MethodCall& method_call) { | ||
| 68 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | ||
| 69 | method_call.subchannel); | ||
| 70 | |||
| 71 | ASSERT(method_call.subchannel < bound_engines.size()); | ||
| 72 | |||
| 73 | if (ExecuteMethodOnEngine(method_call.method)) { | ||
| 74 | CallEngineMethod(method_call); | ||
| 75 | } else { | ||
| 76 | CallPullerMethod(method_call); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Calls a GPU multivalue method. | ||
| 81 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 82 | u32 methods_pending) { | ||
| 83 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | ||
| 44 | 84 | ||
| 45 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 85 | ASSERT(subchannel < bound_engines.size()); |
| 46 | renderer = std::move(renderer_); | 86 | |
| 47 | rasterizer = renderer->ReadRasterizer(); | 87 | if (ExecuteMethodOnEngine(method)) { |
| 88 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 89 | } else { | ||
| 90 | for (std::size_t i = 0; i < amount; i++) { | ||
| 91 | CallPullerMethod(GPU::MethodCall{ | ||
| 92 | method, | ||
| 93 | base_start[i], | ||
| 94 | subchannel, | ||
| 95 | methods_pending - static_cast<u32>(i), | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /// Flush all current written commands into the host GPU for execution. | ||
| 102 | void FlushCommands() { | ||
| 103 | rasterizer->FlushCommands(); | ||
| 104 | } | ||
| 105 | |||
| 106 | /// Synchronizes CPU writes with Host GPU memory. | ||
| 107 | void SyncGuestHost() { | ||
| 108 | rasterizer->SyncGuestHost(); | ||
| 109 | } | ||
| 110 | |||
| 111 | /// Signal the ending of command list. | ||
| 112 | void OnCommandListEnd() { | ||
| 113 | if (is_async) { | ||
| 114 | // This command only applies to asynchronous GPU mode | ||
| 115 | gpu_thread.OnCommandListEnd(); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | /// Request a host GPU memory flush from the CPU. | ||
| 120 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { | ||
| 121 | std::unique_lock lck{flush_request_mutex}; | ||
| 122 | const u64 fence = ++last_flush_fence; | ||
| 123 | flush_requests.emplace_back(fence, addr, size); | ||
| 124 | return fence; | ||
| 125 | } | ||
| 126 | |||
| 127 | /// Obtains current flush request fence id. | ||
| 128 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | ||
| 129 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 130 | } | ||
| 131 | |||
| 132 | /// Tick pending requests within the GPU. | ||
| 133 | void TickWork() { | ||
| 134 | std::unique_lock lck{flush_request_mutex}; | ||
| 135 | while (!flush_requests.empty()) { | ||
| 136 | auto& request = flush_requests.front(); | ||
| 137 | const u64 fence = request.fence; | ||
| 138 | const VAddr addr = request.addr; | ||
| 139 | const std::size_t size = request.size; | ||
| 140 | flush_requests.pop_front(); | ||
| 141 | flush_request_mutex.unlock(); | ||
| 142 | rasterizer->FlushRegion(addr, size); | ||
| 143 | current_flush_fence.store(fence); | ||
| 144 | flush_request_mutex.lock(); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /// Returns a reference to the Maxwell3D GPU engine. | ||
| 149 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { | ||
| 150 | return *maxwell_3d; | ||
| 151 | } | ||
| 152 | |||
| 153 | /// Returns a const reference to the Maxwell3D GPU engine. | ||
| 154 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { | ||
| 155 | return *maxwell_3d; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 159 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { | ||
| 160 | return *kepler_compute; | ||
| 161 | } | ||
| 162 | |||
| 163 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 164 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { | ||
| 165 | return *kepler_compute; | ||
| 166 | } | ||
| 167 | |||
| 168 | /// Returns a reference to the GPU memory manager. | ||
| 169 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { | ||
| 170 | return *memory_manager; | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Returns a const reference to the GPU memory manager. | ||
| 174 | [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const { | ||
| 175 | return *memory_manager; | ||
| 176 | } | ||
| 177 | |||
| 178 | /// Returns a reference to the GPU DMA pusher. | ||
| 179 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { | ||
| 180 | return *dma_pusher; | ||
| 181 | } | ||
| 182 | |||
| 183 | /// Returns a const reference to the GPU DMA pusher. | ||
| 184 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { | ||
| 185 | return *dma_pusher; | ||
| 186 | } | ||
| 187 | |||
| 188 | /// Returns a reference to the GPU CDMA pusher. | ||
| 189 | [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { | ||
| 190 | return *cdma_pusher; | ||
| 191 | } | ||
| 192 | |||
| 193 | /// Returns a const reference to the GPU CDMA pusher. | ||
| 194 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { | ||
| 195 | return *cdma_pusher; | ||
| 196 | } | ||
| 197 | |||
| 198 | /// Returns a reference to the underlying renderer. | ||
| 199 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | ||
| 200 | return *renderer; | ||
| 201 | } | ||
| 202 | |||
| 203 | /// Returns a const reference to the underlying renderer. | ||
| 204 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | ||
| 205 | return *renderer; | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Returns a reference to the shader notifier. | ||
| 209 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | ||
| 210 | return *shader_notify; | ||
| 211 | } | ||
| 212 | |||
| 213 | /// Returns a const reference to the shader notifier. | ||
| 214 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | ||
| 215 | return *shader_notify; | ||
| 216 | } | ||
| 217 | |||
| 218 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 219 | void WaitFence(u32 syncpoint_id, u32 value) { | ||
| 220 | // Synced GPU, is always in sync | ||
| 221 | if (!is_async) { | ||
| 222 | return; | ||
| 223 | } | ||
| 224 | if (syncpoint_id == UINT32_MAX) { | ||
| 225 | // TODO: Research what this does. | ||
| 226 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 227 | return; | ||
| 228 | } | ||
| 229 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 230 | std::unique_lock lock{sync_mutex}; | ||
| 231 | sync_cv.wait(lock, [=, this] { | ||
| 232 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 233 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 234 | return true; | ||
| 235 | } | ||
| 236 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 237 | }); | ||
| 238 | } | ||
| 239 | |||
| 240 | void IncrementSyncPoint(u32 syncpoint_id) { | ||
| 241 | auto& syncpoint = syncpoints.at(syncpoint_id); | ||
| 242 | syncpoint++; | ||
| 243 | std::lock_guard lock{sync_mutex}; | ||
| 244 | sync_cv.notify_all(); | ||
| 245 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 246 | if (!interrupt.empty()) { | ||
| 247 | u32 value = syncpoint.load(); | ||
| 248 | auto it = interrupt.begin(); | ||
| 249 | while (it != interrupt.end()) { | ||
| 250 | if (value >= *it) { | ||
| 251 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 252 | it = interrupt.erase(it); | ||
| 253 | continue; | ||
| 254 | } | ||
| 255 | it++; | ||
| 256 | } | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { | ||
| 261 | return syncpoints.at(syncpoint_id).load(); | ||
| 262 | } | ||
| 263 | |||
| 264 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 265 | std::lock_guard lock{sync_mutex}; | ||
| 266 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 267 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 268 | [value](u32 in_value) { return in_value == value; }); | ||
| 269 | if (contains) { | ||
| 270 | return; | ||
| 271 | } | ||
| 272 | interrupt.emplace_back(value); | ||
| 273 | } | ||
| 274 | |||
| 275 | [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 276 | std::lock_guard lock{sync_mutex}; | ||
| 277 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 278 | const auto iter = | ||
| 279 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 280 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 281 | |||
| 282 | if (iter == interrupt.end()) { | ||
| 283 | return false; | ||
| 284 | } | ||
| 285 | interrupt.erase(iter); | ||
| 286 | return true; | ||
| 287 | } | ||
| 288 | |||
| 289 | [[nodiscard]] u64 GetTicks() const { | ||
| 290 | // This values were reversed engineered by fincs from NVN | ||
| 291 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 292 | constexpr u64 gpu_ticks_num = 384; | ||
| 293 | constexpr u64 gpu_ticks_den = 625; | ||
| 294 | |||
| 295 | u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); | ||
| 296 | if (Settings::values.use_fast_gpu_time.GetValue()) { | ||
| 297 | nanoseconds /= 256; | ||
| 298 | } | ||
| 299 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 300 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 301 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 302 | } | ||
| 303 | |||
| 304 | [[nodiscard]] bool IsAsync() const { | ||
| 305 | return is_async; | ||
| 306 | } | ||
| 307 | |||
| 308 | [[nodiscard]] bool UseNvdec() const { | ||
| 309 | return use_nvdec; | ||
| 310 | } | ||
| 311 | |||
| 312 | void RendererFrameEndNotify() { | ||
| 313 | system.GetPerfStats().EndGameFrame(); | ||
| 314 | } | ||
| 315 | |||
| 316 | /// Performs any additional setup necessary in order to begin GPU emulation. | ||
| 317 | /// This can be used to launch any necessary threads and register any necessary | ||
| 318 | /// core timing events. | ||
| 319 | void Start() { | ||
| 320 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | ||
| 321 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 322 | cpu_context->MakeCurrent(); | ||
| 323 | } | ||
| 324 | |||
| 325 | /// Obtain the CPU Context | ||
| 326 | void ObtainContext() { | ||
| 327 | cpu_context->MakeCurrent(); | ||
| 328 | } | ||
| 329 | |||
| 330 | /// Release the CPU Context | ||
| 331 | void ReleaseContext() { | ||
| 332 | cpu_context->DoneCurrent(); | ||
| 333 | } | ||
| 334 | |||
| 335 | /// Push GPU command entries to be processed | ||
| 336 | void PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 337 | gpu_thread.SubmitList(std::move(entries)); | ||
| 338 | } | ||
| 339 | |||
| 340 | /// Push GPU command buffer entries to be processed | ||
| 341 | void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 342 | if (!use_nvdec) { | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | |||
| 346 | if (!cdma_pusher) { | ||
| 347 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(gpu); | ||
| 348 | } | ||
| 349 | |||
| 350 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 351 | // TODO(ameerj): RE proper async nvdec operation | ||
| 352 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 353 | |||
| 354 | cdma_pusher->ProcessEntries(std::move(entries)); | ||
| 355 | } | ||
| 356 | |||
| 357 | /// Frees the CDMAPusher instance to free up resources | ||
| 358 | void ClearCdmaInstance() { | ||
| 359 | cdma_pusher.reset(); | ||
| 360 | } | ||
| 361 | |||
| 362 | /// Swap buffers (render frame) | ||
| 363 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 364 | gpu_thread.SwapBuffers(framebuffer); | ||
| 365 | } | ||
| 366 | |||
| 367 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 368 | void FlushRegion(VAddr addr, u64 size) { | ||
| 369 | gpu_thread.FlushRegion(addr, size); | ||
| 370 | } | ||
| 371 | |||
| 372 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 373 | void InvalidateRegion(VAddr addr, u64 size) { | ||
| 374 | gpu_thread.InvalidateRegion(addr, size); | ||
| 375 | } | ||
| 376 | |||
| 377 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 378 | void FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 379 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 380 | } | ||
| 381 | |||
| 382 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const { | ||
| 383 | auto& interrupt_manager = system.InterruptManager(); | ||
| 384 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 385 | } | ||
| 386 | |||
| 387 | void ProcessBindMethod(const GPU::MethodCall& method_call) { | ||
| 388 | // Bind the current subchannel to the desired engine id. | ||
| 389 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 390 | method_call.argument); | ||
| 391 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 392 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 393 | switch (engine_id) { | ||
| 394 | case EngineID::FERMI_TWOD_A: | ||
| 395 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 396 | break; | ||
| 397 | case EngineID::MAXWELL_B: | ||
| 398 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 399 | break; | ||
| 400 | case EngineID::KEPLER_COMPUTE_B: | ||
| 401 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 402 | break; | ||
| 403 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 404 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 405 | break; | ||
| 406 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 407 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 408 | break; | ||
| 409 | default: | ||
| 410 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 411 | } | ||
| 412 | } | ||
| 48 | 413 | ||
| 49 | memory_manager->BindRasterizer(rasterizer); | 414 | void ProcessFenceActionMethod() { |
| 50 | maxwell_3d->BindRasterizer(rasterizer); | 415 | switch (regs.fence_action.op) { |
| 51 | fermi_2d->BindRasterizer(rasterizer); | 416 | case GPU::FenceOperation::Acquire: |
| 52 | kepler_compute->BindRasterizer(rasterizer); | 417 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); |
| 53 | maxwell_dma->BindRasterizer(rasterizer); | 418 | break; |
| 419 | case GPU::FenceOperation::Increment: | ||
| 420 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 421 | break; | ||
| 422 | default: | ||
| 423 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 424 | } | ||
| 425 | } | ||
| 426 | |||
| 427 | void ProcessWaitForInterruptMethod() { | ||
| 428 | // TODO(bunnei) ImplementMe | ||
| 429 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 430 | } | ||
| 431 | |||
| 432 | void ProcessSemaphoreTriggerMethod() { | ||
| 433 | const auto semaphoreOperationMask = 0xF; | ||
| 434 | const auto op = | ||
| 435 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 436 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 437 | struct Block { | ||
| 438 | u32 sequence; | ||
| 439 | u32 zeros = 0; | ||
| 440 | u64 timestamp; | ||
| 441 | }; | ||
| 442 | |||
| 443 | Block block{}; | ||
| 444 | block.sequence = regs.semaphore_sequence; | ||
| 445 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 446 | // CoreTiming | ||
| 447 | block.timestamp = GetTicks(); | ||
| 448 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 449 | sizeof(block)); | ||
| 450 | } else { | ||
| 451 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 452 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 453 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 454 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 455 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 456 | // Nothing to do in this case | ||
| 457 | } else { | ||
| 458 | regs.acquire_source = true; | ||
| 459 | regs.acquire_value = regs.semaphore_sequence; | ||
| 460 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 461 | regs.acquire_active = true; | ||
| 462 | regs.acquire_mode = false; | ||
| 463 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 464 | regs.acquire_active = true; | ||
| 465 | regs.acquire_mode = true; | ||
| 466 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 467 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 468 | // semaphore_sequence, gives a non-0 result | ||
| 469 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 470 | } else { | ||
| 471 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 472 | } | ||
| 473 | } | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | void ProcessSemaphoreRelease() { | ||
| 478 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), | ||
| 479 | regs.semaphore_release); | ||
| 480 | } | ||
| 481 | |||
| 482 | void ProcessSemaphoreAcquire() { | ||
| 483 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 484 | const auto value = regs.semaphore_acquire; | ||
| 485 | if (word != value) { | ||
| 486 | regs.acquire_active = true; | ||
| 487 | regs.acquire_value = value; | ||
| 488 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 489 | regs.acquire_mode = false; | ||
| 490 | regs.acquire_source = false; | ||
| 491 | } | ||
| 492 | } | ||
| 493 | |||
| 494 | /// Calls a GPU puller method. | ||
| 495 | void CallPullerMethod(const GPU::MethodCall& method_call) { | ||
| 496 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 497 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 498 | |||
| 499 | switch (method) { | ||
| 500 | case BufferMethods::BindObject: { | ||
| 501 | ProcessBindMethod(method_call); | ||
| 502 | break; | ||
| 503 | } | ||
| 504 | case BufferMethods::Nop: | ||
| 505 | case BufferMethods::SemaphoreAddressHigh: | ||
| 506 | case BufferMethods::SemaphoreAddressLow: | ||
| 507 | case BufferMethods::SemaphoreSequence: | ||
| 508 | case BufferMethods::UnkCacheFlush: | ||
| 509 | case BufferMethods::WrcacheFlush: | ||
| 510 | case BufferMethods::FenceValue: | ||
| 511 | break; | ||
| 512 | case BufferMethods::RefCnt: | ||
| 513 | rasterizer->SignalReference(); | ||
| 514 | break; | ||
| 515 | case BufferMethods::FenceAction: | ||
| 516 | ProcessFenceActionMethod(); | ||
| 517 | break; | ||
| 518 | case BufferMethods::WaitForInterrupt: | ||
| 519 | ProcessWaitForInterruptMethod(); | ||
| 520 | break; | ||
| 521 | case BufferMethods::SemaphoreTrigger: { | ||
| 522 | ProcessSemaphoreTriggerMethod(); | ||
| 523 | break; | ||
| 524 | } | ||
| 525 | case BufferMethods::NotifyIntr: { | ||
| 526 | // TODO(Kmather73): Research and implement this method. | ||
| 527 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 528 | break; | ||
| 529 | } | ||
| 530 | case BufferMethods::Unk28: { | ||
| 531 | // TODO(Kmather73): Research and implement this method. | ||
| 532 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 533 | break; | ||
| 534 | } | ||
| 535 | case BufferMethods::SemaphoreAcquire: { | ||
| 536 | ProcessSemaphoreAcquire(); | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | case BufferMethods::SemaphoreRelease: { | ||
| 540 | ProcessSemaphoreRelease(); | ||
| 541 | break; | ||
| 542 | } | ||
| 543 | case BufferMethods::Yield: { | ||
| 544 | // TODO(Kmather73): Research and implement this method. | ||
| 545 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 546 | break; | ||
| 547 | } | ||
| 548 | default: | ||
| 549 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 550 | break; | ||
| 551 | } | ||
| 552 | } | ||
| 553 | |||
| 554 | /// Calls a GPU engine method. | ||
| 555 | void CallEngineMethod(const GPU::MethodCall& method_call) { | ||
| 556 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 557 | |||
| 558 | switch (engine) { | ||
| 559 | case EngineID::FERMI_TWOD_A: | ||
| 560 | fermi_2d->CallMethod(method_call.method, method_call.argument, | ||
| 561 | method_call.IsLastCall()); | ||
| 562 | break; | ||
| 563 | case EngineID::MAXWELL_B: | ||
| 564 | maxwell_3d->CallMethod(method_call.method, method_call.argument, | ||
| 565 | method_call.IsLastCall()); | ||
| 566 | break; | ||
| 567 | case EngineID::KEPLER_COMPUTE_B: | ||
| 568 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 569 | method_call.IsLastCall()); | ||
| 570 | break; | ||
| 571 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 572 | maxwell_dma->CallMethod(method_call.method, method_call.argument, | ||
| 573 | method_call.IsLastCall()); | ||
| 574 | break; | ||
| 575 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 576 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 577 | method_call.IsLastCall()); | ||
| 578 | break; | ||
| 579 | default: | ||
| 580 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 581 | } | ||
| 582 | } | ||
| 583 | |||
| 584 | /// Calls a GPU engine multivalue method. | ||
| 585 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 586 | u32 methods_pending) { | ||
| 587 | const EngineID engine = bound_engines[subchannel]; | ||
| 588 | |||
| 589 | switch (engine) { | ||
| 590 | case EngineID::FERMI_TWOD_A: | ||
| 591 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 592 | break; | ||
| 593 | case EngineID::MAXWELL_B: | ||
| 594 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 595 | break; | ||
| 596 | case EngineID::KEPLER_COMPUTE_B: | ||
| 597 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 598 | break; | ||
| 599 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 600 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 601 | break; | ||
| 602 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 603 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 604 | break; | ||
| 605 | default: | ||
| 606 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | /// Determines where the method should be executed. | ||
| 611 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) { | ||
| 612 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 613 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 614 | } | ||
| 615 | |||
| 616 | struct Regs { | ||
| 617 | static constexpr size_t NUM_REGS = 0x40; | ||
| 618 | |||
| 619 | union { | ||
| 620 | struct { | ||
| 621 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 622 | struct { | ||
| 623 | u32 address_high; | ||
| 624 | u32 address_low; | ||
| 625 | |||
| 626 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 627 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 628 | address_low); | ||
| 629 | } | ||
| 630 | } semaphore_address; | ||
| 631 | |||
| 632 | u32 semaphore_sequence; | ||
| 633 | u32 semaphore_trigger; | ||
| 634 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 635 | |||
| 636 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 637 | // access | ||
| 638 | u32 reference_count; | ||
| 639 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 640 | |||
| 641 | u32 semaphore_acquire; | ||
| 642 | u32 semaphore_release; | ||
| 643 | u32 fence_value; | ||
| 644 | GPU::FenceAction fence_action; | ||
| 645 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 646 | |||
| 647 | // Puller state | ||
| 648 | u32 acquire_mode; | ||
| 649 | u32 acquire_source; | ||
| 650 | u32 acquire_active; | ||
| 651 | u32 acquire_timeout; | ||
| 652 | u32 acquire_value; | ||
| 653 | }; | ||
| 654 | std::array<u32, NUM_REGS> reg_array; | ||
| 655 | }; | ||
| 656 | } regs{}; | ||
| 657 | |||
| 658 | GPU& gpu; | ||
| 659 | Core::System& system; | ||
| 660 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||
| 661 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 662 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 663 | std::unique_ptr<VideoCore::RendererBase> renderer; | ||
| 664 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 665 | const bool use_nvdec; | ||
| 666 | |||
| 667 | /// Mapping of command subchannels to their bound engine ids | ||
| 668 | std::array<EngineID, 8> bound_engines{}; | ||
| 669 | /// 3D engine | ||
| 670 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 671 | /// 2D engine | ||
| 672 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 673 | /// Compute engine | ||
| 674 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 675 | /// DMA engine | ||
| 676 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 677 | /// Inline memory engine | ||
| 678 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 679 | /// Shader build notifier | ||
| 680 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 681 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||
| 682 | std::atomic_bool shutting_down{}; | ||
| 683 | |||
| 684 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 685 | |||
| 686 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 687 | |||
| 688 | std::mutex sync_mutex; | ||
| 689 | std::mutex device_mutex; | ||
| 690 | |||
| 691 | std::condition_variable sync_cv; | ||
| 692 | |||
| 693 | struct FlushRequest { | ||
| 694 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||
| 695 | : fence{fence_}, addr{addr_}, size{size_} {} | ||
| 696 | u64 fence; | ||
| 697 | VAddr addr; | ||
| 698 | std::size_t size; | ||
| 699 | }; | ||
| 700 | |||
| 701 | std::list<FlushRequest> flush_requests; | ||
| 702 | std::atomic<u64> current_flush_fence{}; | ||
| 703 | u64 last_flush_fence{}; | ||
| 704 | std::mutex flush_request_mutex; | ||
| 705 | |||
| 706 | const bool is_async; | ||
| 707 | |||
| 708 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 709 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 710 | |||
| 711 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 712 | static_assert(offsetof(Regs, field_name) == position * 4, \ | ||
| 713 | "Field " #field_name " has invalid position") | ||
| 714 | |||
| 715 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 716 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 717 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 718 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 719 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 720 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 721 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 722 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 723 | |||
| 724 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 725 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 726 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 727 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 728 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 729 | |||
| 730 | #undef ASSERT_REG_POSITION | ||
| 731 | |||
| 732 | enum class GpuSemaphoreOperation { | ||
| 733 | AcquireEqual = 0x1, | ||
| 734 | WriteLong = 0x2, | ||
| 735 | AcquireGequal = 0x4, | ||
| 736 | AcquireMask = 0x8, | ||
| 737 | }; | ||
| 738 | }; | ||
| 739 | |||
| 740 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | ||
| 741 | : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {} | ||
| 742 | |||
| 743 | GPU::~GPU() = default; | ||
| 744 | |||
| 745 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | ||
| 746 | impl->BindRenderer(std::move(renderer)); | ||
| 54 | } | 747 | } |
| 55 | 748 | ||
| 56 | Engines::Maxwell3D& GPU::Maxwell3D() { | 749 | void GPU::CallMethod(const MethodCall& method_call) { |
| 57 | return *maxwell_3d; | 750 | impl->CallMethod(method_call); |
| 58 | } | 751 | } |
| 59 | 752 | ||
| 60 | const Engines::Maxwell3D& GPU::Maxwell3D() const { | 753 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, |
| 61 | return *maxwell_3d; | 754 | u32 methods_pending) { |
| 755 | impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 62 | } | 756 | } |
| 63 | 757 | ||
| 64 | Engines::KeplerCompute& GPU::KeplerCompute() { | 758 | void GPU::FlushCommands() { |
| 65 | return *kepler_compute; | 759 | impl->FlushCommands(); |
| 66 | } | 760 | } |
| 67 | 761 | ||
| 68 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | 762 | void GPU::SyncGuestHost() { |
| 69 | return *kepler_compute; | 763 | impl->SyncGuestHost(); |
| 70 | } | 764 | } |
| 71 | 765 | ||
| 72 | MemoryManager& GPU::MemoryManager() { | 766 | void GPU::OnCommandListEnd() { |
| 73 | return *memory_manager; | 767 | impl->OnCommandListEnd(); |
| 74 | } | 768 | } |
| 75 | 769 | ||
| 76 | const MemoryManager& GPU::MemoryManager() const { | 770 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { |
| 77 | return *memory_manager; | 771 | return impl->RequestFlush(addr, size); |
| 78 | } | 772 | } |
| 79 | 773 | ||
| 80 | DmaPusher& GPU::DmaPusher() { | 774 | u64 GPU::CurrentFlushRequestFence() const { |
| 81 | return *dma_pusher; | 775 | return impl->CurrentFlushRequestFence(); |
| 82 | } | 776 | } |
| 83 | 777 | ||
| 84 | Tegra::CDmaPusher& GPU::CDmaPusher() { | 778 | void GPU::TickWork() { |
| 85 | return *cdma_pusher; | 779 | impl->TickWork(); |
| 86 | } | 780 | } |
| 87 | 781 | ||
| 88 | const DmaPusher& GPU::DmaPusher() const { | 782 | Engines::Maxwell3D& GPU::Maxwell3D() { |
| 89 | return *dma_pusher; | 783 | return impl->Maxwell3D(); |
| 90 | } | 784 | } |
| 91 | 785 | ||
| 92 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { | 786 | const Engines::Maxwell3D& GPU::Maxwell3D() const { |
| 93 | return *cdma_pusher; | 787 | return impl->Maxwell3D(); |
| 94 | } | 788 | } |
| 95 | 789 | ||
| 96 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { | 790 | Engines::KeplerCompute& GPU::KeplerCompute() { |
| 97 | // Synced GPU, is always in sync | 791 | return impl->KeplerCompute(); |
| 98 | if (!is_async) { | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | if (syncpoint_id == UINT32_MAX) { | ||
| 102 | // TODO: Research what this does. | ||
| 103 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 107 | std::unique_lock lock{sync_mutex}; | ||
| 108 | sync_cv.wait(lock, [=, this] { | ||
| 109 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 110 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 111 | return true; | ||
| 112 | } | ||
| 113 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 114 | }); | ||
| 115 | } | ||
| 116 | |||
| 117 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | ||
| 118 | auto& syncpoint = syncpoints.at(syncpoint_id); | ||
| 119 | syncpoint++; | ||
| 120 | std::lock_guard lock{sync_mutex}; | ||
| 121 | sync_cv.notify_all(); | ||
| 122 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 123 | if (!interrupt.empty()) { | ||
| 124 | u32 value = syncpoint.load(); | ||
| 125 | auto it = interrupt.begin(); | ||
| 126 | while (it != interrupt.end()) { | ||
| 127 | if (value >= *it) { | ||
| 128 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 129 | it = interrupt.erase(it); | ||
| 130 | continue; | ||
| 131 | } | ||
| 132 | it++; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | 792 | } |
| 136 | 793 | ||
| 137 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | 794 | const Engines::KeplerCompute& GPU::KeplerCompute() const { |
| 138 | return syncpoints.at(syncpoint_id).load(); | 795 | return impl->KeplerCompute(); |
| 139 | } | 796 | } |
| 140 | 797 | ||
| 141 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 798 | Tegra::MemoryManager& GPU::MemoryManager() { |
| 142 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | 799 | return impl->MemoryManager(); |
| 143 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 144 | [value](u32 in_value) { return in_value == value; }); | ||
| 145 | if (contains) { | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | interrupt.emplace_back(value); | ||
| 149 | } | 800 | } |
| 150 | 801 | ||
| 151 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 802 | const Tegra::MemoryManager& GPU::MemoryManager() const { |
| 152 | std::lock_guard lock{sync_mutex}; | 803 | return impl->MemoryManager(); |
| 153 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | 804 | } |
| 154 | const auto iter = | ||
| 155 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 156 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 157 | 805 | ||
| 158 | if (iter == interrupt.end()) { | 806 | Tegra::DmaPusher& GPU::DmaPusher() { |
| 159 | return false; | 807 | return impl->DmaPusher(); |
| 160 | } | ||
| 161 | interrupt.erase(iter); | ||
| 162 | return true; | ||
| 163 | } | 808 | } |
| 164 | 809 | ||
| 165 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 810 | const Tegra::DmaPusher& GPU::DmaPusher() const { |
| 166 | std::unique_lock lck{flush_request_mutex}; | 811 | return impl->DmaPusher(); |
| 167 | const u64 fence = ++last_flush_fence; | ||
| 168 | flush_requests.emplace_back(fence, addr, size); | ||
| 169 | return fence; | ||
| 170 | } | 812 | } |
| 171 | 813 | ||
| 172 | void GPU::TickWork() { | 814 | Tegra::CDmaPusher& GPU::CDmaPusher() { |
| 173 | std::unique_lock lck{flush_request_mutex}; | 815 | return impl->CDmaPusher(); |
| 174 | while (!flush_requests.empty()) { | ||
| 175 | auto& request = flush_requests.front(); | ||
| 176 | const u64 fence = request.fence; | ||
| 177 | const VAddr addr = request.addr; | ||
| 178 | const std::size_t size = request.size; | ||
| 179 | flush_requests.pop_front(); | ||
| 180 | flush_request_mutex.unlock(); | ||
| 181 | rasterizer->FlushRegion(addr, size); | ||
| 182 | current_flush_fence.store(fence); | ||
| 183 | flush_request_mutex.lock(); | ||
| 184 | } | ||
| 185 | } | 816 | } |
| 186 | 817 | ||
| 187 | u64 GPU::GetTicks() const { | 818 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { |
| 188 | // This values were reversed engineered by fincs from NVN | 819 | return impl->CDmaPusher(); |
| 189 | // The gpu clock is reported in units of 385/625 nanoseconds | 820 | } |
| 190 | constexpr u64 gpu_ticks_num = 384; | ||
| 191 | constexpr u64 gpu_ticks_den = 625; | ||
| 192 | 821 | ||
| 193 | u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); | 822 | VideoCore::RendererBase& GPU::Renderer() { |
| 194 | if (Settings::values.use_fast_gpu_time.GetValue()) { | 823 | return impl->Renderer(); |
| 195 | nanoseconds /= 256; | ||
| 196 | } | ||
| 197 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 198 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 199 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 200 | } | 824 | } |
| 201 | 825 | ||
| 202 | void GPU::RendererFrameEndNotify() { | 826 | const VideoCore::RendererBase& GPU::Renderer() const { |
| 203 | system.GetPerfStats().EndGameFrame(); | 827 | return impl->Renderer(); |
| 204 | } | 828 | } |
| 205 | 829 | ||
| 206 | void GPU::FlushCommands() { | 830 | VideoCore::ShaderNotify& GPU::ShaderNotify() { |
| 207 | rasterizer->FlushCommands(); | 831 | return impl->ShaderNotify(); |
| 208 | } | 832 | } |
| 209 | 833 | ||
| 210 | void GPU::SyncGuestHost() { | 834 | const VideoCore::ShaderNotify& GPU::ShaderNotify() const { |
| 211 | rasterizer->SyncGuestHost(); | 835 | return impl->ShaderNotify(); |
| 212 | } | 836 | } |
| 213 | 837 | ||
| 214 | enum class GpuSemaphoreOperation { | 838 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 215 | AcquireEqual = 0x1, | 839 | impl->WaitFence(syncpoint_id, value); |
| 216 | WriteLong = 0x2, | 840 | } |
| 217 | AcquireGequal = 0x4, | ||
| 218 | AcquireMask = 0x8, | ||
| 219 | }; | ||
| 220 | 841 | ||
| 221 | void GPU::CallMethod(const MethodCall& method_call) { | 842 | void GPU::IncrementSyncPoint(u32 syncpoint_id) { |
| 222 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | 843 | impl->IncrementSyncPoint(syncpoint_id); |
| 223 | method_call.subchannel); | 844 | } |
| 224 | 845 | ||
| 225 | ASSERT(method_call.subchannel < bound_engines.size()); | 846 | u32 GPU::GetSyncpointValue(u32 syncpoint_id) const { |
| 847 | return impl->GetSyncpointValue(syncpoint_id); | ||
| 848 | } | ||
| 226 | 849 | ||
| 227 | if (ExecuteMethodOnEngine(method_call.method)) { | 850 | void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 228 | CallEngineMethod(method_call); | 851 | impl->RegisterSyncptInterrupt(syncpoint_id, value); |
| 229 | } else { | ||
| 230 | CallPullerMethod(method_call); | ||
| 231 | } | ||
| 232 | } | 852 | } |
| 233 | 853 | ||
| 234 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 854 | bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 235 | u32 methods_pending) { | 855 | return impl->CancelSyncptInterrupt(syncpoint_id, value); |
| 236 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | ||
| 237 | |||
| 238 | ASSERT(subchannel < bound_engines.size()); | ||
| 239 | |||
| 240 | if (ExecuteMethodOnEngine(method)) { | ||
| 241 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | ||
| 242 | } else { | ||
| 243 | for (std::size_t i = 0; i < amount; i++) { | ||
| 244 | CallPullerMethod(MethodCall{ | ||
| 245 | method, | ||
| 246 | base_start[i], | ||
| 247 | subchannel, | ||
| 248 | methods_pending - static_cast<u32>(i), | ||
| 249 | }); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } | 856 | } |
| 253 | 857 | ||
| 254 | bool GPU::ExecuteMethodOnEngine(u32 method) { | 858 | u64 GPU::GetTicks() const { |
| 255 | const auto buffer_method = static_cast<BufferMethods>(method); | 859 | return impl->GetTicks(); |
| 256 | return buffer_method >= BufferMethods::NonPullerMethods; | 860 | } |
| 257 | } | ||
| 258 | |||
| 259 | void GPU::CallPullerMethod(const MethodCall& method_call) { | ||
| 260 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 261 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 262 | |||
| 263 | switch (method) { | ||
| 264 | case BufferMethods::BindObject: { | ||
| 265 | ProcessBindMethod(method_call); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | case BufferMethods::Nop: | ||
| 269 | case BufferMethods::SemaphoreAddressHigh: | ||
| 270 | case BufferMethods::SemaphoreAddressLow: | ||
| 271 | case BufferMethods::SemaphoreSequence: | ||
| 272 | case BufferMethods::UnkCacheFlush: | ||
| 273 | case BufferMethods::WrcacheFlush: | ||
| 274 | case BufferMethods::FenceValue: | ||
| 275 | break; | ||
| 276 | case BufferMethods::RefCnt: | ||
| 277 | rasterizer->SignalReference(); | ||
| 278 | break; | ||
| 279 | case BufferMethods::FenceAction: | ||
| 280 | ProcessFenceActionMethod(); | ||
| 281 | break; | ||
| 282 | case BufferMethods::WaitForInterrupt: | ||
| 283 | ProcessWaitForInterruptMethod(); | ||
| 284 | break; | ||
| 285 | case BufferMethods::SemaphoreTrigger: { | ||
| 286 | ProcessSemaphoreTriggerMethod(); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | case BufferMethods::NotifyIntr: { | ||
| 290 | // TODO(Kmather73): Research and implement this method. | ||
| 291 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 292 | break; | ||
| 293 | } | ||
| 294 | case BufferMethods::Unk28: { | ||
| 295 | // TODO(Kmather73): Research and implement this method. | ||
| 296 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | case BufferMethods::SemaphoreAcquire: { | ||
| 300 | ProcessSemaphoreAcquire(); | ||
| 301 | break; | ||
| 302 | } | ||
| 303 | case BufferMethods::SemaphoreRelease: { | ||
| 304 | ProcessSemaphoreRelease(); | ||
| 305 | break; | ||
| 306 | } | ||
| 307 | case BufferMethods::Yield: { | ||
| 308 | // TODO(Kmather73): Research and implement this method. | ||
| 309 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 310 | break; | ||
| 311 | } | ||
| 312 | default: | ||
| 313 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 314 | break; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | void GPU::CallEngineMethod(const MethodCall& method_call) { | ||
| 319 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 320 | |||
| 321 | switch (engine) { | ||
| 322 | case EngineID::FERMI_TWOD_A: | ||
| 323 | fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 324 | break; | ||
| 325 | case EngineID::MAXWELL_B: | ||
| 326 | maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 327 | break; | ||
| 328 | case EngineID::KEPLER_COMPUTE_B: | ||
| 329 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 330 | method_call.IsLastCall()); | ||
| 331 | break; | ||
| 332 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 333 | maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); | ||
| 334 | break; | ||
| 335 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 336 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 337 | method_call.IsLastCall()); | ||
| 338 | break; | ||
| 339 | default: | ||
| 340 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | void GPU::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 345 | u32 methods_pending) { | ||
| 346 | const EngineID engine = bound_engines[subchannel]; | ||
| 347 | |||
| 348 | switch (engine) { | ||
| 349 | case EngineID::FERMI_TWOD_A: | ||
| 350 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 351 | break; | ||
| 352 | case EngineID::MAXWELL_B: | ||
| 353 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 354 | break; | ||
| 355 | case EngineID::KEPLER_COMPUTE_B: | ||
| 356 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 357 | break; | ||
| 358 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 359 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 360 | break; | ||
| 361 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 362 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 363 | break; | ||
| 364 | default: | ||
| 365 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 366 | } | ||
| 367 | } | ||
| 368 | |||
| 369 | void GPU::ProcessBindMethod(const MethodCall& method_call) { | ||
| 370 | // Bind the current subchannel to the desired engine id. | ||
| 371 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 372 | method_call.argument); | ||
| 373 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 374 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 375 | switch (engine_id) { | ||
| 376 | case EngineID::FERMI_TWOD_A: | ||
| 377 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 378 | break; | ||
| 379 | case EngineID::MAXWELL_B: | ||
| 380 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 381 | break; | ||
| 382 | case EngineID::KEPLER_COMPUTE_B: | ||
| 383 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 384 | break; | ||
| 385 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 386 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 387 | break; | ||
| 388 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 389 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 390 | break; | ||
| 391 | default: | ||
| 392 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | void GPU::ProcessFenceActionMethod() { | ||
| 397 | switch (regs.fence_action.op) { | ||
| 398 | case FenceOperation::Acquire: | ||
| 399 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 400 | break; | ||
| 401 | case FenceOperation::Increment: | ||
| 402 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 403 | break; | ||
| 404 | default: | ||
| 405 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 406 | } | ||
| 407 | } | ||
| 408 | |||
| 409 | void GPU::ProcessWaitForInterruptMethod() { | ||
| 410 | // TODO(bunnei) ImplementMe | ||
| 411 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 412 | } | ||
| 413 | |||
| 414 | void GPU::ProcessSemaphoreTriggerMethod() { | ||
| 415 | const auto semaphoreOperationMask = 0xF; | ||
| 416 | const auto op = | ||
| 417 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 418 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 419 | struct Block { | ||
| 420 | u32 sequence; | ||
| 421 | u32 zeros = 0; | ||
| 422 | u64 timestamp; | ||
| 423 | }; | ||
| 424 | 861 | ||
| 425 | Block block{}; | 862 | bool GPU::IsAsync() const { |
| 426 | block.sequence = regs.semaphore_sequence; | 863 | return impl->IsAsync(); |
| 427 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 428 | // CoreTiming | ||
| 429 | block.timestamp = GetTicks(); | ||
| 430 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 431 | sizeof(block)); | ||
| 432 | } else { | ||
| 433 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 434 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 435 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 436 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 437 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 438 | // Nothing to do in this case | ||
| 439 | } else { | ||
| 440 | regs.acquire_source = true; | ||
| 441 | regs.acquire_value = regs.semaphore_sequence; | ||
| 442 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 443 | regs.acquire_active = true; | ||
| 444 | regs.acquire_mode = false; | ||
| 445 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 446 | regs.acquire_active = true; | ||
| 447 | regs.acquire_mode = true; | ||
| 448 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 449 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 450 | // semaphore_sequence, gives a non-0 result | ||
| 451 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 452 | } else { | ||
| 453 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 454 | } | ||
| 455 | } | ||
| 456 | } | ||
| 457 | } | 864 | } |
| 458 | 865 | ||
| 459 | void GPU::ProcessSemaphoreRelease() { | 866 | bool GPU::UseNvdec() const { |
| 460 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); | 867 | return impl->UseNvdec(); |
| 461 | } | 868 | } |
| 462 | 869 | ||
| 463 | void GPU::ProcessSemaphoreAcquire() { | 870 | void GPU::RendererFrameEndNotify() { |
| 464 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 871 | impl->RendererFrameEndNotify(); |
| 465 | const auto value = regs.semaphore_acquire; | ||
| 466 | if (word != value) { | ||
| 467 | regs.acquire_active = true; | ||
| 468 | regs.acquire_value = value; | ||
| 469 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 470 | regs.acquire_mode = false; | ||
| 471 | regs.acquire_source = false; | ||
| 472 | } | ||
| 473 | } | 872 | } |
| 474 | 873 | ||
| 475 | void GPU::Start() { | 874 | void GPU::Start() { |
| 476 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | 875 | impl->Start(); |
| 477 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 478 | cpu_context->MakeCurrent(); | ||
| 479 | } | 876 | } |
| 480 | 877 | ||
| 481 | void GPU::ObtainContext() { | 878 | void GPU::ObtainContext() { |
| 482 | cpu_context->MakeCurrent(); | 879 | impl->ObtainContext(); |
| 483 | } | 880 | } |
| 484 | 881 | ||
| 485 | void GPU::ReleaseContext() { | 882 | void GPU::ReleaseContext() { |
| 486 | cpu_context->DoneCurrent(); | 883 | impl->ReleaseContext(); |
| 487 | } | 884 | } |
| 488 | 885 | ||
| 489 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | 886 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { |
| 490 | gpu_thread.SubmitList(std::move(entries)); | 887 | impl->PushGPUEntries(std::move(entries)); |
| 491 | } | 888 | } |
| 492 | 889 | ||
| 493 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | 890 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { |
| 494 | if (!use_nvdec) { | 891 | impl->PushCommandBuffer(entries); |
| 495 | return; | ||
| 496 | } | ||
| 497 | |||
| 498 | if (!cdma_pusher) { | ||
| 499 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 500 | } | ||
| 501 | |||
| 502 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 503 | // TODO(ameerj): RE proper async nvdec operation | ||
| 504 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 505 | |||
| 506 | cdma_pusher->ProcessEntries(std::move(entries)); | ||
| 507 | } | 892 | } |
| 508 | 893 | ||
| 509 | void GPU::ClearCdmaInstance() { | 894 | void GPU::ClearCdmaInstance() { |
| 510 | cdma_pusher.reset(); | 895 | impl->ClearCdmaInstance(); |
| 511 | } | 896 | } |
| 512 | 897 | ||
| 513 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 898 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 514 | gpu_thread.SwapBuffers(framebuffer); | 899 | impl->SwapBuffers(framebuffer); |
| 515 | } | 900 | } |
| 516 | 901 | ||
| 517 | void GPU::FlushRegion(VAddr addr, u64 size) { | 902 | void GPU::FlushRegion(VAddr addr, u64 size) { |
| 518 | gpu_thread.FlushRegion(addr, size); | 903 | impl->FlushRegion(addr, size); |
| 519 | } | 904 | } |
| 520 | 905 | ||
| 521 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | 906 | void GPU::InvalidateRegion(VAddr addr, u64 size) { |
| 522 | gpu_thread.InvalidateRegion(addr, size); | 907 | impl->InvalidateRegion(addr, size); |
| 523 | } | 908 | } |
| 524 | 909 | ||
| 525 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 910 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 526 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 911 | impl->FlushAndInvalidateRegion(addr, size); |
| 527 | } | ||
| 528 | |||
| 529 | void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 530 | auto& interrupt_manager = system.InterruptManager(); | ||
| 531 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 532 | } | ||
| 533 | |||
| 534 | void GPU::OnCommandListEnd() { | ||
| 535 | if (is_async) { | ||
| 536 | // This command only applies to asynchronous GPU mode | ||
| 537 | gpu_thread.OnCommandListEnd(); | ||
| 538 | } | ||
| 539 | } | 912 | } |
| 540 | 913 | ||
| 541 | } // namespace Tegra | 914 | } // namespace Tegra |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e6a02a71b..05e5c94f3 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -4,28 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <list> | ||
| 11 | #include <memory> | 7 | #include <memory> |
| 12 | #include <mutex> | 8 | |
| 9 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 14 | #include "core/hle/service/nvdrv/nvdata.h" | ||
| 15 | #include "core/hle/service/nvflinger/buffer_queue.h" | ||
| 16 | #include "video_core/cdma_pusher.h" | 11 | #include "video_core/cdma_pusher.h" |
| 17 | #include "video_core/dma_pusher.h" | ||
| 18 | #include "video_core/framebuffer_config.h" | 12 | #include "video_core/framebuffer_config.h" |
| 19 | #include "video_core/gpu_thread.h" | ||
| 20 | |||
| 21 | using CacheAddr = std::uintptr_t; | ||
| 22 | [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { | ||
| 23 | return reinterpret_cast<CacheAddr>(host_ptr); | ||
| 24 | } | ||
| 25 | |||
| 26 | [[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) { | ||
| 27 | return reinterpret_cast<u8*>(cache_addr); | ||
| 28 | } | ||
| 29 | 13 | ||
| 30 | namespace Core { | 14 | namespace Core { |
| 31 | namespace Frontend { | 15 | namespace Frontend { |
| @@ -40,6 +24,9 @@ class ShaderNotify; | |||
| 40 | } // namespace VideoCore | 24 | } // namespace VideoCore |
| 41 | 25 | ||
| 42 | namespace Tegra { | 26 | namespace Tegra { |
| 27 | class DmaPusher; | ||
| 28 | class CDmaPusher; | ||
| 29 | struct CommandList; | ||
| 43 | 30 | ||
| 44 | enum class RenderTargetFormat : u32 { | 31 | enum class RenderTargetFormat : u32 { |
| 45 | NONE = 0x0, | 32 | NONE = 0x0, |
| @@ -138,7 +125,18 @@ public: | |||
| 138 | } | 125 | } |
| 139 | }; | 126 | }; |
| 140 | 127 | ||
| 141 | explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); | 128 | enum class FenceOperation : u32 { |
| 129 | Acquire = 0, | ||
| 130 | Increment = 1, | ||
| 131 | }; | ||
| 132 | |||
| 133 | union FenceAction { | ||
| 134 | u32 raw; | ||
| 135 | BitField<0, 1, FenceOperation> op; | ||
| 136 | BitField<8, 24, u32> syncpoint_id; | ||
| 137 | }; | ||
| 138 | |||
| 139 | explicit GPU(Core::System& system, bool is_async, bool use_nvdec); | ||
| 142 | ~GPU(); | 140 | ~GPU(); |
| 143 | 141 | ||
| 144 | /// Binds a renderer to the GPU. | 142 | /// Binds a renderer to the GPU. |
| @@ -162,9 +160,7 @@ public: | |||
| 162 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | 160 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |
| 163 | 161 | ||
| 164 | /// Obtains current flush request fence id. | 162 | /// Obtains current flush request fence id. |
| 165 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | 163 | [[nodiscard]] u64 CurrentFlushRequestFence() const; |
| 166 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 167 | } | ||
| 168 | 164 | ||
| 169 | /// Tick pending requests within the GPU. | 165 | /// Tick pending requests within the GPU. |
| 170 | void TickWork(); | 166 | void TickWork(); |
| @@ -200,24 +196,16 @@ public: | |||
| 200 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; | 196 | [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; |
| 201 | 197 | ||
| 202 | /// Returns a reference to the underlying renderer. | 198 | /// Returns a reference to the underlying renderer. |
| 203 | [[nodiscard]] VideoCore::RendererBase& Renderer() { | 199 | [[nodiscard]] VideoCore::RendererBase& Renderer(); |
| 204 | return *renderer; | ||
| 205 | } | ||
| 206 | 200 | ||
| 207 | /// Returns a const reference to the underlying renderer. | 201 | /// Returns a const reference to the underlying renderer. |
| 208 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | 202 | [[nodiscard]] const VideoCore::RendererBase& Renderer() const; |
| 209 | return *renderer; | ||
| 210 | } | ||
| 211 | 203 | ||
| 212 | /// Returns a reference to the shader notifier. | 204 | /// Returns a reference to the shader notifier. |
| 213 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | 205 | [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify(); |
| 214 | return *shader_notify; | ||
| 215 | } | ||
| 216 | 206 | ||
| 217 | /// Returns a const reference to the shader notifier. | 207 | /// Returns a const reference to the shader notifier. |
| 218 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | 208 | [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; |
| 219 | return *shader_notify; | ||
| 220 | } | ||
| 221 | 209 | ||
| 222 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 210 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 223 | void WaitFence(u32 syncpoint_id, u32 value); | 211 | void WaitFence(u32 syncpoint_id, u32 value); |
| @@ -232,80 +220,12 @@ public: | |||
| 232 | 220 | ||
| 233 | [[nodiscard]] u64 GetTicks() const; | 221 | [[nodiscard]] u64 GetTicks() const; |
| 234 | 222 | ||
| 235 | [[nodiscard]] std::unique_lock<std::mutex> LockSync() { | 223 | [[nodiscard]] bool IsAsync() const; |
| 236 | return std::unique_lock{sync_mutex}; | ||
| 237 | } | ||
| 238 | |||
| 239 | [[nodiscard]] bool IsAsync() const { | ||
| 240 | return is_async; | ||
| 241 | } | ||
| 242 | 224 | ||
| 243 | [[nodiscard]] bool UseNvdec() const { | 225 | [[nodiscard]] bool UseNvdec() const; |
| 244 | return use_nvdec; | ||
| 245 | } | ||
| 246 | 226 | ||
| 247 | void RendererFrameEndNotify(); | 227 | void RendererFrameEndNotify(); |
| 248 | 228 | ||
| 249 | enum class FenceOperation : u32 { | ||
| 250 | Acquire = 0, | ||
| 251 | Increment = 1, | ||
| 252 | }; | ||
| 253 | |||
| 254 | union FenceAction { | ||
| 255 | u32 raw; | ||
| 256 | BitField<0, 1, FenceOperation> op; | ||
| 257 | BitField<8, 24, u32> syncpoint_id; | ||
| 258 | |||
| 259 | [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||
| 260 | FenceAction result{}; | ||
| 261 | result.op.Assign(op); | ||
| 262 | result.syncpoint_id.Assign(syncpoint_id); | ||
| 263 | return {result.raw}; | ||
| 264 | } | ||
| 265 | }; | ||
| 266 | |||
| 267 | struct Regs { | ||
| 268 | static constexpr size_t NUM_REGS = 0x40; | ||
| 269 | |||
| 270 | union { | ||
| 271 | struct { | ||
| 272 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 273 | struct { | ||
| 274 | u32 address_high; | ||
| 275 | u32 address_low; | ||
| 276 | |||
| 277 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 278 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 279 | address_low); | ||
| 280 | } | ||
| 281 | } semaphore_address; | ||
| 282 | |||
| 283 | u32 semaphore_sequence; | ||
| 284 | u32 semaphore_trigger; | ||
| 285 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 286 | |||
| 287 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 288 | // access | ||
| 289 | u32 reference_count; | ||
| 290 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 291 | |||
| 292 | u32 semaphore_acquire; | ||
| 293 | u32 semaphore_release; | ||
| 294 | u32 fence_value; | ||
| 295 | FenceAction fence_action; | ||
| 296 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 297 | |||
| 298 | // Puller state | ||
| 299 | u32 acquire_mode; | ||
| 300 | u32 acquire_source; | ||
| 301 | u32 acquire_active; | ||
| 302 | u32 acquire_timeout; | ||
| 303 | u32 acquire_value; | ||
| 304 | }; | ||
| 305 | std::array<u32, NUM_REGS> reg_array; | ||
| 306 | }; | ||
| 307 | } regs{}; | ||
| 308 | |||
| 309 | /// Performs any additional setup necessary in order to begin GPU emulation. | 229 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 310 | /// This can be used to launch any necessary threads and register any necessary | 230 | /// This can be used to launch any necessary threads and register any necessary |
| 311 | /// core timing events. | 231 | /// core timing events. |
| @@ -338,104 +258,9 @@ public: | |||
| 338 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 258 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 339 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 259 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 340 | 260 | ||
| 341 | protected: | ||
| 342 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; | ||
| 343 | |||
| 344 | private: | 261 | private: |
| 345 | void ProcessBindMethod(const MethodCall& method_call); | 262 | struct Impl; |
| 346 | void ProcessFenceActionMethod(); | 263 | std::unique_ptr<Impl> impl; |
| 347 | void ProcessWaitForInterruptMethod(); | ||
| 348 | void ProcessSemaphoreTriggerMethod(); | ||
| 349 | void ProcessSemaphoreRelease(); | ||
| 350 | void ProcessSemaphoreAcquire(); | ||
| 351 | |||
| 352 | /// Calls a GPU puller method. | ||
| 353 | void CallPullerMethod(const MethodCall& method_call); | ||
| 354 | |||
| 355 | /// Calls a GPU engine method. | ||
| 356 | void CallEngineMethod(const MethodCall& method_call); | ||
| 357 | |||
| 358 | /// Calls a GPU engine multivalue method. | ||
| 359 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 360 | u32 methods_pending); | ||
| 361 | |||
| 362 | /// Determines where the method should be executed. | ||
| 363 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | ||
| 364 | |||
| 365 | protected: | ||
| 366 | Core::System& system; | ||
| 367 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||
| 368 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 369 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||
| 370 | std::unique_ptr<VideoCore::RendererBase> renderer; | ||
| 371 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 372 | const bool use_nvdec; | ||
| 373 | |||
| 374 | private: | ||
| 375 | /// Mapping of command subchannels to their bound engine ids | ||
| 376 | std::array<EngineID, 8> bound_engines = {}; | ||
| 377 | /// 3D engine | ||
| 378 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 379 | /// 2D engine | ||
| 380 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 381 | /// Compute engine | ||
| 382 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 383 | /// DMA engine | ||
| 384 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 385 | /// Inline memory engine | ||
| 386 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 387 | /// Shader build notifier | ||
| 388 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 389 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||
| 390 | std::atomic_bool shutting_down{}; | ||
| 391 | |||
| 392 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||
| 393 | |||
| 394 | std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||
| 395 | |||
| 396 | std::mutex sync_mutex; | ||
| 397 | std::mutex device_mutex; | ||
| 398 | |||
| 399 | std::condition_variable sync_cv; | ||
| 400 | |||
| 401 | struct FlushRequest { | ||
| 402 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||
| 403 | : fence{fence_}, addr{addr_}, size{size_} {} | ||
| 404 | u64 fence; | ||
| 405 | VAddr addr; | ||
| 406 | std::size_t size; | ||
| 407 | }; | ||
| 408 | |||
| 409 | std::list<FlushRequest> flush_requests; | ||
| 410 | std::atomic<u64> current_flush_fence{}; | ||
| 411 | u64 last_flush_fence{}; | ||
| 412 | std::mutex flush_request_mutex; | ||
| 413 | |||
| 414 | const bool is_async; | ||
| 415 | |||
| 416 | VideoCommon::GPUThread::ThreadManager gpu_thread; | ||
| 417 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||
| 418 | }; | 264 | }; |
| 419 | 265 | ||
| 420 | #define ASSERT_REG_POSITION(field_name, position) \ | ||
| 421 | static_assert(offsetof(GPU::Regs, field_name) == position * 4, \ | ||
| 422 | "Field " #field_name " has invalid position") | ||
| 423 | |||
| 424 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 425 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 426 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 427 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 428 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 429 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 430 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 431 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 432 | |||
| 433 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 434 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 435 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 436 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 437 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 438 | |||
| 439 | #undef ASSERT_REG_POSITION | ||
| 440 | |||
| 441 | } // namespace Tegra | 266 | } // namespace Tegra |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 91bada925..00984188e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -130,9 +130,6 @@ public: | |||
| 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 130 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 131 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 132 | 132 | ||
| 133 | // Stops the GPU execution and waits for the GPU to finish working | ||
| 134 | void ShutDown(); | ||
| 135 | |||
| 136 | void OnCommandListEnd(); | 133 | void OnCommandListEnd(); |
| 137 | 134 | ||
| 138 | private: | 135 | private: |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index aac851253..73231061a 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <cstring> | 9 | #include <cstring> |
| 10 | #include <iterator> | 10 | #include <iterator> |
| 11 | #include <list> | ||
| 11 | #include <memory> | 12 | #include <memory> |
| 12 | #include <mutex> | 13 | #include <mutex> |
| 13 | #include <optional> | 14 | #include <optional> |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 54dae2c41..8c3ca3d82 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/surface.h" | 20 | #include "video_core/surface.h" |
| 21 | #include "video_core/texture_cache/formatter.h" | 21 | #include "video_core/texture_cache/formatter.h" |
| 22 | #include "video_core/texture_cache/samples_helper.h" | 22 | #include "video_core/texture_cache/samples_helper.h" |
| 23 | #include "video_core/texture_cache/util.h" | ||
| 23 | 24 | ||
| 24 | namespace OpenGL { | 25 | namespace OpenGL { |
| 25 | namespace { | 26 | namespace { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index c498a8a8f..1ca2c90be 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/util_shaders.h" | 14 | #include "video_core/renderer_opengl/util_shaders.h" |
| 15 | #include "video_core/texture_cache/image_view_base.h" | ||
| 15 | #include "video_core/texture_cache/texture_cache_base.h" | 16 | #include "video_core/texture_cache/texture_cache_base.h" |
| 16 | 17 | ||
| 17 | namespace OpenGL { | 18 | namespace OpenGL { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 3b87640b5..06c5fb867 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 22 | #include "video_core/texture_cache/formatter.h" | 22 | #include "video_core/texture_cache/formatter.h" |
| 23 | #include "video_core/texture_cache/samples_helper.h" | 23 | #include "video_core/texture_cache/samples_helper.h" |
| 24 | #include "video_core/texture_cache/util.h" | ||
| 24 | #include "video_core/vulkan_common/vulkan_device.h" | 25 | #include "video_core/vulkan_common/vulkan_device.h" |
| 25 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 26 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 26 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 27 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 6d5a68bfe..b09c468e4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -4,11 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <compare> | ||
| 8 | #include <span> | 7 | #include <span> |
| 9 | 8 | ||
| 10 | #include "shader_recompiler/shader_info.h" | 9 | #include "shader_recompiler/shader_info.h" |
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/texture_cache/image_view_base.h" | ||
| 12 | #include "video_core/texture_cache/texture_cache_base.h" | 12 | #include "video_core/texture_cache/texture_cache_base.h" |
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 81a878bb2..05850afd0 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "common/fs/fs.h" | 16 | #include "common/fs/fs.h" |
| 17 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 18 | #include "shader_recompiler/environment.h" | 18 | #include "shader_recompiler/environment.h" |
| 19 | #include "video_core/engines/kepler_compute.h" | ||
| 19 | #include "video_core/memory_manager.h" | 20 | #include "video_core/memory_manager.h" |
| 20 | #include "video_core/shader_environment.h" | 21 | #include "video_core/shader_environment.h" |
| 21 | #include "video_core/textures/texture.h" | 22 | #include "video_core/textures/texture.h" |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 2079979db..6640e53d0 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -5,13 +5,13 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <filesystem> | 8 | #include <filesystem> |
| 10 | #include <iosfwd> | 9 | #include <iosfwd> |
| 11 | #include <limits> | 10 | #include <limits> |
| 12 | #include <memory> | 11 | #include <memory> |
| 13 | #include <optional> | 12 | #include <optional> |
| 14 | #include <span> | 13 | #include <span> |
| 14 | #include <stop_token> | ||
| 15 | #include <type_traits> | 15 | #include <type_traits> |
| 16 | #include <unordered_map> | 16 | #include <unordered_map> |
| 17 | #include <vector> | 17 | #include <vector> |
| @@ -19,9 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "common/unique_function.h" | 20 | #include "common/unique_function.h" |
| 21 | #include "shader_recompiler/environment.h" | 21 | #include "shader_recompiler/environment.h" |
| 22 | #include "video_core/engines/kepler_compute.h" | ||
| 23 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 24 | #include "video_core/textures/texture.h" | ||
| 25 | 23 | ||
| 26 | namespace Tegra { | 24 | namespace Tegra { |
| 27 | class Memorymanager; | 25 | class Memorymanager; |
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index 6527e14c8..e751f26c7 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "video_core/texture_cache/image_view_info.h" | 8 | #include "video_core/texture_cache/image_view_info.h" |
| 9 | #include "video_core/texture_cache/texture_cache_base.h" | 9 | #include "video_core/texture_cache/texture_cache_base.h" |
| 10 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 11 | #include "video_core/texture_cache/util.h" | ||
| 11 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| 12 | 13 | ||
| 13 | namespace VideoCommon { | 14 | namespace VideoCommon { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 24b809242..329df2e49 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -4,10 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_set> | ||
| 8 | |||
| 7 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 8 | #include "video_core/dirty_flags.h" | 10 | #include "video_core/dirty_flags.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | ||
| 12 | #include "video_core/texture_cache/image_view_base.h" | ||
| 9 | #include "video_core/texture_cache/samples_helper.h" | 13 | #include "video_core/texture_cache/samples_helper.h" |
| 10 | #include "video_core/texture_cache/texture_cache_base.h" | 14 | #include "video_core/texture_cache/texture_cache_base.h" |
| 15 | #include "video_core/texture_cache/util.h" | ||
| 11 | 16 | ||
| 12 | namespace VideoCommon { | 17 | namespace VideoCommon { |
| 13 | 18 | ||
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index d7528ed24..2d1893c1c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -4,13 +4,12 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <mutex> | 7 | #include <mutex> |
| 9 | #include <span> | 8 | #include <span> |
| 10 | #include <type_traits> | 9 | #include <type_traits> |
| 11 | #include <unordered_map> | 10 | #include <unordered_map> |
| 12 | #include <unordered_set> | ||
| 13 | #include <vector> | 11 | #include <vector> |
| 12 | #include <queue> | ||
| 14 | 13 | ||
| 15 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 16 | #include "common/literals.h" | 15 | #include "common/literals.h" |
| @@ -18,10 +17,6 @@ | |||
| 18 | #include "video_core/compatible_formats.h" | 17 | #include "video_core/compatible_formats.h" |
| 19 | #include "video_core/delayed_destruction_ring.h" | 18 | #include "video_core/delayed_destruction_ring.h" |
| 20 | #include "video_core/engines/fermi_2d.h" | 19 | #include "video_core/engines/fermi_2d.h" |
| 21 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | ||
| 23 | #include "video_core/memory_manager.h" | ||
| 24 | #include "video_core/rasterizer_interface.h" | ||
| 25 | #include "video_core/surface.h" | 20 | #include "video_core/surface.h" |
| 26 | #include "video_core/texture_cache/descriptor_table.h" | 21 | #include "video_core/texture_cache/descriptor_table.h" |
| 27 | #include "video_core/texture_cache/image_base.h" | 22 | #include "video_core/texture_cache/image_base.h" |
| @@ -30,7 +25,6 @@ | |||
| 30 | #include "video_core/texture_cache/render_targets.h" | 25 | #include "video_core/texture_cache/render_targets.h" |
| 31 | #include "video_core/texture_cache/slot_vector.h" | 26 | #include "video_core/texture_cache/slot_vector.h" |
| 32 | #include "video_core/texture_cache/types.h" | 27 | #include "video_core/texture_cache/types.h" |
| 33 | #include "video_core/texture_cache/util.h" | ||
| 34 | #include "video_core/textures/texture.h" | 28 | #include "video_core/textures/texture.h" |
| 35 | 29 | ||
| 36 | namespace VideoCommon { | 30 | namespace VideoCommon { |