diff options
| author | 2019-02-18 20:58:32 -0500 | |
|---|---|---|
| committer | 2019-03-14 22:34:42 -0400 | |
| commit | 2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch) | |
| tree | 6ad0848c848aea68e637386cad5068e13c831b92 /src/video_core/gpu_thread.h | |
| parent | Merge pull request #2233 from ReinUsesLisp/morton-cleanup (diff) | |
| download | yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip | |
gpu: Use host address for caching instead of guest address.
Diffstat (limited to 'src/video_core/gpu_thread.h')
| -rw-r--r-- | src/video_core/gpu_thread.h | 132 |
1 files changed, 92 insertions, 40 deletions
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index edb148b14..8cd7db1c6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | #include <thread> | 13 | #include <thread> |
| 14 | #include <variant> | 14 | #include <variant> |
| 15 | 15 | ||
| 16 | #include "common/threadsafe_queue.h" | ||
| 17 | #include "video_core/gpu.h" | ||
| 18 | |||
| 16 | namespace Tegra { | 19 | namespace Tegra { |
| 17 | struct FramebufferConfig; | 20 | struct FramebufferConfig; |
| 18 | class DmaPusher; | 21 | class DmaPusher; |
| @@ -24,6 +27,9 @@ class RendererBase; | |||
| 24 | 27 | ||
| 25 | namespace VideoCommon::GPUThread { | 28 | namespace VideoCommon::GPUThread { |
| 26 | 29 | ||
| 30 | /// Command to signal to the GPU thread that processing has ended | ||
| 31 | struct EndProcessingCommand final {}; | ||
| 32 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | 33 | /// Command to signal to the GPU thread that a command list is ready for processing |
| 28 | struct SubmitListCommand final { | 34 | struct SubmitListCommand final { |
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | 35 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} |
| @@ -36,59 +42,110 @@ struct SwapBuffersCommand final { | |||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | 42 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) |
| 37 | : framebuffer{std::move(framebuffer)} {} | 43 | : framebuffer{std::move(framebuffer)} {} |
| 38 | 44 | ||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | 45 | std::optional<Tegra::FramebufferConfig> framebuffer; |
| 40 | }; | 46 | }; |
| 41 | 47 | ||
| 42 | /// Command to signal to the GPU thread to flush a region | 48 | /// Command to signal to the GPU thread to flush a region |
| 43 | struct FlushRegionCommand final { | 49 | struct FlushRegionCommand final { |
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | 50 | explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} |
| 45 | 51 | ||
| 46 | const VAddr addr; | 52 | CacheAddr addr; |
| 47 | const u64 size; | 53 | u64 size; |
| 48 | }; | 54 | }; |
| 49 | 55 | ||
| 50 | /// Command to signal to the GPU thread to invalidate a region | 56 | /// Command to signal to the GPU thread to invalidate a region |
| 51 | struct InvalidateRegionCommand final { | 57 | struct InvalidateRegionCommand final { |
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | 58 | explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} |
| 53 | 59 | ||
| 54 | const VAddr addr; | 60 | CacheAddr addr; |
| 55 | const u64 size; | 61 | u64 size; |
| 56 | }; | 62 | }; |
| 57 | 63 | ||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | 64 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 59 | struct FlushAndInvalidateRegionCommand final { | 65 | struct FlushAndInvalidateRegionCommand final { |
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | 66 | explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) |
| 61 | : addr{addr}, size{size} {} | 67 | : addr{addr}, size{size} {} |
| 62 | 68 | ||
| 63 | const VAddr addr; | 69 | CacheAddr addr; |
| 64 | const u64 size; | 70 | u64 size; |
| 65 | }; | 71 | }; |
| 66 | 72 | ||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 73 | using CommandData = |
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | 74 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 75 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 76 | |||
| 77 | struct CommandDataContainer { | ||
| 78 | CommandDataContainer() = default; | ||
| 79 | |||
| 80 | CommandDataContainer(CommandData&& data) : data{std::move(data)} {} | ||
| 81 | |||
| 82 | CommandDataContainer& operator=(const CommandDataContainer& t) { | ||
| 83 | data = std::move(t.data); | ||
| 84 | return *this; | ||
| 85 | } | ||
| 86 | |||
| 87 | CommandData data; | ||
| 88 | }; | ||
| 69 | 89 | ||
| 70 | /// Struct used to synchronize the GPU thread | 90 | /// Struct used to synchronize the GPU thread |
| 71 | struct SynchState final { | 91 | struct SynchState final { |
| 72 | std::atomic<bool> is_running{true}; | 92 | std::atomic_bool is_running{true}; |
| 73 | std::atomic<bool> is_idle{true}; | 93 | std::atomic_int queued_frame_count{}; |
| 74 | std::condition_variable signal_condition; | 94 | std::mutex frames_mutex; |
| 75 | std::mutex signal_mutex; | 95 | std::mutex commands_mutex; |
| 76 | std::condition_variable idle_condition; | 96 | std::condition_variable commands_condition; |
| 77 | std::mutex idle_mutex; | 97 | std::condition_variable frames_condition; |
| 78 | 98 | ||
| 79 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | 99 | void IncrementFramesCounter() { |
| 80 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | 100 | std::lock_guard<std::mutex> lock{frames_mutex}; |
| 81 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | 101 | ++queued_frame_count; |
| 82 | 102 | } | |
| 83 | using CommandQueue = std::queue<CommandData>; | 103 | |
| 84 | std::array<CommandQueue, 2> command_queues; | 104 | void DecrementFramesCounter() { |
| 85 | CommandQueue* push_queue{&command_queues[0]}; | 105 | { |
| 86 | CommandQueue* pop_queue{&command_queues[1]}; | 106 | std::lock_guard<std::mutex> lock{frames_mutex}; |
| 87 | 107 | --queued_frame_count; | |
| 88 | void UpdateIdleState() { | 108 | |
| 89 | std::lock_guard<std::mutex> lock{idle_mutex}; | 109 | if (queued_frame_count) { |
| 90 | is_idle = command_queues[0].empty() && command_queues[1].empty(); | 110 | return; |
| 111 | } | ||
| 112 | } | ||
| 113 | frames_condition.notify_one(); | ||
| 91 | } | 114 | } |
| 115 | |||
| 116 | void WaitForFrames() { | ||
| 117 | { | ||
| 118 | std::lock_guard<std::mutex> lock{frames_mutex}; | ||
| 119 | if (!queued_frame_count) { | ||
| 120 | return; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 125 | { | ||
| 126 | std::unique_lock<std::mutex> lock{frames_mutex}; | ||
| 127 | frames_condition.wait(lock, [this] { return !queued_frame_count; }); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | void SignalCommands() { | ||
| 132 | { | ||
| 133 | std::unique_lock<std::mutex> lock{commands_mutex}; | ||
| 134 | if (queue.Empty()) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | commands_condition.notify_one(); | ||
| 140 | } | ||
| 141 | |||
| 142 | void WaitForCommands() { | ||
| 143 | std::unique_lock<std::mutex> lock{commands_mutex}; | ||
| 144 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 145 | } | ||
| 146 | |||
| 147 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||
| 148 | CommandQueue queue; | ||
| 92 | }; | 149 | }; |
| 93 | 150 | ||
| 94 | /// Class used to manage the GPU thread | 151 | /// Class used to manage the GPU thread |
| @@ -105,22 +162,17 @@ public: | |||
| 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | 162 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |
| 106 | 163 | ||
| 107 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 164 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 108 | void FlushRegion(VAddr addr, u64 size); | 165 | void FlushRegion(CacheAddr addr, u64 size); |
| 109 | 166 | ||
| 110 | /// Notify rasterizer that any caches of the specified region should be invalidated | 167 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 111 | void InvalidateRegion(VAddr addr, u64 size); | 168 | void InvalidateRegion(CacheAddr addr, u64 size); |
| 112 | 169 | ||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 170 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 114 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 171 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); |
| 115 | 172 | ||
| 116 | private: | 173 | private: |
| 117 | /// Pushes a command to be executed by the GPU thread | 174 | /// Pushes a command to be executed by the GPU thread |
| 118 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | 175 | void PushCommand(CommandData&& command_data); |
| 119 | |||
| 120 | /// Returns true if this is called by the GPU thread | ||
| 121 | bool IsGpuThread() const { | ||
| 122 | return std::this_thread::get_id() == thread_id; | ||
| 123 | } | ||
| 124 | 176 | ||
| 125 | private: | 177 | private: |
| 126 | SynchState state; | 178 | SynchState state; |