diff options
| author | 2018-11-27 19:17:33 -0500 | |
|---|---|---|
| committer | 2018-11-27 19:17:33 -0500 | |
| commit | ac74b71d7530452126792c5fa0bf01fe7378ba00 (patch) | |
| tree | 7db6044f15ded8659aff9fd822d41139c495e171 /src/video_core | |
| parent | gpu: Move command list profiling to DmaPusher::DispatchCalls. (diff) | |
| download | yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.gz yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.tar.xz yuzu-ac74b71d7530452126792c5fa0bf01fe7378ba00.zip | |
dma_pushbuffer: Optimize to avoid loop and copy on Push.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 10 |
2 files changed, 17 insertions, 5 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 23ec97944..63a958f11 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -23,6 +23,8 @@ void DmaPusher::DispatchCalls() { | |||
| 23 | // On entering GPU code, assume all memory may be touched by the ARM core. | 23 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 24 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); | 24 | gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); |
| 25 | 25 | ||
| 26 | dma_pushbuffer_subindex = 0; | ||
| 27 | |||
| 26 | while (Core::System::GetInstance().IsPoweredOn()) { | 28 | while (Core::System::GetInstance().IsPoweredOn()) { |
| 27 | if (!Step()) { | 29 | if (!Step()) { |
| 28 | break; | 30 | break; |
| @@ -89,11 +91,17 @@ bool DmaPusher::Step() { | |||
| 89 | } | 91 | } |
| 90 | } else if (ib_enable && !dma_pushbuffer.empty()) { | 92 | } else if (ib_enable && !dma_pushbuffer.empty()) { |
| 91 | // Current pushbuffer empty, but we have more IB entries to read | 93 | // Current pushbuffer empty, but we have more IB entries to read |
| 92 | const CommandListHeader& command_list_header{dma_pushbuffer.front()}; | 94 | const CommandList& command_list{dma_pushbuffer.front()}; |
| 95 | const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; | ||
| 93 | dma_get = command_list_header.addr; | 96 | dma_get = command_list_header.addr; |
| 94 | dma_put = dma_get + command_list_header.size * sizeof(u32); | 97 | dma_put = dma_get + command_list_header.size * sizeof(u32); |
| 95 | non_main = command_list_header.is_non_main; | 98 | non_main = command_list_header.is_non_main; |
| 96 | dma_pushbuffer.pop(); | 99 | |
| 100 | if (dma_pushbuffer_subindex >= command_list.size()) { | ||
| 101 | // We've gone through the current list, remove it from the queue | ||
| 102 | dma_pushbuffer.pop(); | ||
| 103 | dma_pushbuffer_subindex = 0; | ||
| 104 | } | ||
| 97 | } else { | 105 | } else { |
| 98 | // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do | 106 | // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do |
| 99 | return {}; | 107 | return {}; |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 39d98e46e..16e0697c4 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 7 | #include <queue> | 8 | #include <queue> |
| 8 | 9 | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| @@ -45,6 +46,8 @@ static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect | |||
| 45 | 46 | ||
| 46 | class GPU; | 47 | class GPU; |
| 47 | 48 | ||
| 49 | using CommandList = std::vector<Tegra::CommandListHeader>; | ||
| 50 | |||
| 48 | /** | 51 | /** |
| 49 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the | 52 | * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the |
| 50 | * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled | 53 | * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled |
| @@ -57,8 +60,8 @@ public: | |||
| 57 | explicit DmaPusher(GPU& gpu); | 60 | explicit DmaPusher(GPU& gpu); |
| 58 | ~DmaPusher(); | 61 | ~DmaPusher(); |
| 59 | 62 | ||
| 60 | void Push(const CommandListHeader& command_list_header) { | 63 | void Push(CommandList&& entries) { |
| 61 | dma_pushbuffer.push(command_list_header); | 64 | dma_pushbuffer.push(std::move(entries)); |
| 62 | } | 65 | } |
| 63 | 66 | ||
| 64 | void DispatchCalls(); | 67 | void DispatchCalls(); |
| @@ -72,7 +75,8 @@ private: | |||
| 72 | 75 | ||
| 73 | GPU& gpu; | 76 | GPU& gpu; |
| 74 | 77 | ||
| 75 | std::queue<CommandListHeader> dma_pushbuffer; | 78 | std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed |
| 79 | std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer | ||
| 76 | 80 | ||
| 77 | struct DmaState { | 81 | struct DmaState { |
| 78 | u32 method; ///< Current method | 82 | u32 method; ///< Current method |