diff options
| author | 2018-09-06 15:48:08 +0200 | |
|---|---|---|
| committer | 2018-09-10 22:06:13 +0200 | |
| commit | 0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch) | |
| tree | c6fdc90795dc4e1851e2b3e3bd792e48f19251ba | |
| parent | rasterizer: Drop unused handler. (diff) | |
| download | yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.gz yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.xz yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.zip | |
video_core: Move command buffer loop.
This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 26 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 17 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/command_processor.h | 17 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 |
5 files changed, 84 insertions, 77 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 4cdf7f613..8e0f9a9e5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | 9 | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" |
| 10 | #include "core/memory.h" | 10 | #include "core/memory.h" |
| 11 | #include "video_core/command_processor.h" | ||
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 13 | 14 | ||
| @@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 134 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 135 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 135 | params.address, params.num_entries, params.flags); | 136 | params.address, params.num_entries, params.flags); |
| 136 | 137 | ||
| 137 | ASSERT_MSG(input.size() == | 138 | ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + |
| 138 | sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry), | 139 | params.num_entries * sizeof(Tegra::CommandListHeader), |
| 139 | "Incorrect input size"); | 140 | "Incorrect input size"); |
| 140 | 141 | ||
| 141 | std::vector<IoctlGpfifoEntry> entries(params.num_entries); | 142 | std::vector<Tegra::CommandListHeader> entries(params.num_entries); |
| 142 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 143 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 143 | params.num_entries * sizeof(IoctlGpfifoEntry)); | 144 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 144 | for (auto entry : entries) { | 145 | |
| 145 | Tegra::GPUVAddr va_addr = entry.Address(); | 146 | Core::System::GetInstance().GPU().ProcessCommandLists(entries); |
| 146 | Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz); | 147 | |
| 147 | } | ||
| 148 | params.fence_out.id = 0; | 148 | params.fence_out.id = 0; |
| 149 | params.fence_out.value = 0; | 149 | params.fence_out.value = 0; |
| 150 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); | 150 | std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo)); |
| @@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 160 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", | 160 | LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", |
| 161 | params.address, params.num_entries, params.flags); | 161 | params.address, params.num_entries, params.flags); |
| 162 | 162 | ||
| 163 | std::vector<IoctlGpfifoEntry> entries(params.num_entries); | 163 | std::vector<Tegra::CommandListHeader> entries(params.num_entries); |
| 164 | Memory::ReadBlock(params.address, entries.data(), | 164 | Memory::ReadBlock(params.address, entries.data(), |
| 165 | params.num_entries * sizeof(IoctlGpfifoEntry)); | 165 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 166 | |||
| 167 | Core::System::GetInstance().GPU().ProcessCommandLists(entries); | ||
| 166 | 168 | ||
| 167 | for (auto entry : entries) { | ||
| 168 | Tegra::GPUVAddr va_addr = entry.Address(); | ||
| 169 | Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz); | ||
| 170 | } | ||
| 171 | params.fence_out.id = 0; | 169 | params.fence_out.id = 0; |
| 172 | params.fence_out.value = 0; | 170 | params.fence_out.value = 0; |
| 173 | std::memcpy(output.data(), ¶ms, output.size()); | 171 | std::memcpy(output.data(), ¶ms, output.size()); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 03b7356d0..baaefd79a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | 13 | ||
| 15 | namespace Service::Nvidia::Devices { | 14 | namespace Service::Nvidia::Devices { |
| 16 | 15 | ||
| @@ -151,22 +150,6 @@ private: | |||
| 151 | }; | 150 | }; |
| 152 | static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size"); | 151 | static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size"); |
| 153 | 152 | ||
| 154 | struct IoctlGpfifoEntry { | ||
| 155 | u32_le entry0; // gpu_va_lo | ||
| 156 | union { | ||
| 157 | u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) | ||
| 158 | BitField<0, 8, u32_le> gpu_va_hi; | ||
| 159 | BitField<8, 2, u32_le> unk1; | ||
| 160 | BitField<10, 21, u32_le> sz; | ||
| 161 | BitField<31, 1, u32_le> unk2; | ||
| 162 | }; | ||
| 163 | |||
| 164 | Tegra::GPUVAddr Address() const { | ||
| 165 | return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0; | ||
| 166 | } | ||
| 167 | }; | ||
| 168 | static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size"); | ||
| 169 | |||
| 170 | struct IoctlSubmitGpfifo { | 153 | struct IoctlSubmitGpfifo { |
| 171 | u64_le address; // pointer to gpfifo entry structs | 154 | u64_le address; // pointer to gpfifo entry structs |
| 172 | u32_le num_entries; // number of fence objects being submitted | 155 | u32_le num_entries; // number of fence objects being submitted |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d5831e752..e0c277105 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -69,57 +69,64 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) | |||
| 69 | } | 69 | } |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | void GPU::ProcessCommandList(GPUVAddr address, u32 size) { | 72 | MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192)); |
| 73 | const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); | ||
| 74 | VAddr current_addr = *head_address; | ||
| 75 | while (current_addr < *head_address + size * sizeof(CommandHeader)) { | ||
| 76 | const CommandHeader header = {Memory::Read32(current_addr)}; | ||
| 77 | current_addr += sizeof(u32); | ||
| 78 | 73 | ||
| 79 | switch (header.mode.Value()) { | 74 | void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) { |
| 80 | case SubmissionMode::IncreasingOld: | 75 | MICROPROFILE_SCOPE(ProcessCommandLists); |
| 81 | case SubmissionMode::Increasing: { | 76 | for (auto entry : commands) { |
| 82 | // Increase the method value with each argument. | 77 | Tegra::GPUVAddr address = entry.Address(); |
| 83 | for (unsigned i = 0; i < header.arg_count; ++i) { | 78 | u32 size = entry.sz; |
| 84 | WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), | 79 | const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address); |
| 85 | header.arg_count - i - 1); | 80 | VAddr current_addr = *head_address; |
| 86 | current_addr += sizeof(u32); | 81 | while (current_addr < *head_address + size * sizeof(CommandHeader)) { |
| 82 | const CommandHeader header = {Memory::Read32(current_addr)}; | ||
| 83 | current_addr += sizeof(u32); | ||
| 84 | |||
| 85 | switch (header.mode.Value()) { | ||
| 86 | case SubmissionMode::IncreasingOld: | ||
| 87 | case SubmissionMode::Increasing: { | ||
| 88 | // Increase the method value with each argument. | ||
| 89 | for (unsigned i = 0; i < header.arg_count; ++i) { | ||
| 90 | WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), | ||
| 91 | header.arg_count - i - 1); | ||
| 92 | current_addr += sizeof(u32); | ||
| 93 | } | ||
| 94 | break; | ||
| 87 | } | 95 | } |
| 88 | break; | 96 | case SubmissionMode::NonIncreasingOld: |
| 89 | } | 97 | case SubmissionMode::NonIncreasing: { |
| 90 | case SubmissionMode::NonIncreasingOld: | 98 | // Use the same method value for all arguments. |
| 91 | case SubmissionMode::NonIncreasing: { | 99 | for (unsigned i = 0; i < header.arg_count; ++i) { |
| 92 | // Use the same method value for all arguments. | 100 | WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), |
| 93 | for (unsigned i = 0; i < header.arg_count; ++i) { | 101 | header.arg_count - i - 1); |
| 94 | WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), | 102 | current_addr += sizeof(u32); |
| 95 | header.arg_count - i - 1); | 103 | } |
| 96 | current_addr += sizeof(u32); | 104 | break; |
| 97 | } | 105 | } |
| 98 | break; | 106 | case SubmissionMode::IncreaseOnce: { |
| 99 | } | 107 | ASSERT(header.arg_count.Value() >= 1); |
| 100 | case SubmissionMode::IncreaseOnce: { | ||
| 101 | ASSERT(header.arg_count.Value() >= 1); | ||
| 102 | 108 | ||
| 103 | // Use the original method for the first argument and then the next method for all other | 109 | // Use the original method for the first argument and then the next method for all |
| 104 | // arguments. | 110 | // other arguments. |
| 105 | WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), | 111 | WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), |
| 106 | header.arg_count - 1); | 112 | header.arg_count - 1); |
| 107 | current_addr += sizeof(u32); | ||
| 108 | |||
| 109 | for (unsigned i = 1; i < header.arg_count; ++i) { | ||
| 110 | WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), | ||
| 111 | header.arg_count - i - 1); | ||
| 112 | current_addr += sizeof(u32); | 113 | current_addr += sizeof(u32); |
| 114 | |||
| 115 | for (unsigned i = 1; i < header.arg_count; ++i) { | ||
| 116 | WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr), | ||
| 117 | header.arg_count - i - 1); | ||
| 118 | current_addr += sizeof(u32); | ||
| 119 | } | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case SubmissionMode::Inline: { | ||
| 123 | // The register value is stored in the bits 16-28 as an immediate | ||
| 124 | WriteReg(header.method, header.subchannel, header.inline_data, 0); | ||
| 125 | break; | ||
| 126 | } | ||
| 127 | default: | ||
| 128 | UNIMPLEMENTED(); | ||
| 113 | } | 129 | } |
| 114 | break; | ||
| 115 | } | ||
| 116 | case SubmissionMode::Inline: { | ||
| 117 | // The register value is stored in the bits 16-28 as an immediate | ||
| 118 | WriteReg(header.method, header.subchannel, header.inline_data, 0); | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | default: | ||
| 122 | UNIMPLEMENTED(); | ||
| 123 | } | 130 | } |
| 124 | } | 131 | } |
| 125 | } | 132 | } |
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index a01153e0b..bd766e77a 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <type_traits> | 7 | #include <type_traits> |
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/memory_manager.h" | ||
| 10 | 11 | ||
| 11 | namespace Tegra { | 12 | namespace Tegra { |
| 12 | 13 | ||
| @@ -19,6 +20,22 @@ enum class SubmissionMode : u32 { | |||
| 19 | IncreaseOnce = 5 | 20 | IncreaseOnce = 5 |
| 20 | }; | 21 | }; |
| 21 | 22 | ||
| 23 | struct CommandListHeader { | ||
| 24 | u32 entry0; // gpu_va_lo | ||
| 25 | union { | ||
| 26 | u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F) | ||
| 27 | BitField<0, 8, u32> gpu_va_hi; | ||
| 28 | BitField<8, 2, u32> unk1; | ||
| 29 | BitField<10, 21, u32> sz; | ||
| 30 | BitField<31, 1, u32> unk2; | ||
| 31 | }; | ||
| 32 | |||
| 33 | GPUVAddr Address() const { | ||
| 34 | return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0; | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size"); | ||
| 38 | |||
| 22 | union CommandHeader { | 39 | union CommandHeader { |
| 23 | u32 hex; | 40 | u32 hex; |
| 24 | 41 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d29f31f52..9163fbdc6 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <vector> | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "core/hle/service/nvflinger/buffer_queue.h" | 11 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 11 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| @@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format); | |||
| 67 | /// Returns the number of bytes per pixel of each depth format. | 68 | /// Returns the number of bytes per pixel of each depth format. |
| 68 | u32 DepthFormatBytesPerPixel(DepthFormat format); | 69 | u32 DepthFormatBytesPerPixel(DepthFormat format); |
| 69 | 70 | ||
| 71 | struct CommandListHeader; | ||
| 70 | class DebugContext; | 72 | class DebugContext; |
| 71 | 73 | ||
| 72 | /** | 74 | /** |
| @@ -115,7 +117,7 @@ public: | |||
| 115 | ~GPU(); | 117 | ~GPU(); |
| 116 | 118 | ||
| 117 | /// Processes a command list stored at the specified address in GPU memory. | 119 | /// Processes a command list stored at the specified address in GPU memory. |
| 118 | void ProcessCommandList(GPUVAddr address, u32 size); | 120 | void ProcessCommandLists(const std::vector<CommandListHeader>& commands); |
| 119 | 121 | ||
| 120 | /// Returns a reference to the Maxwell3D GPU engine. | 122 | /// Returns a reference to the Maxwell3D GPU engine. |
| 121 | Engines::Maxwell3D& Maxwell3D(); | 123 | Engines::Maxwell3D& Maxwell3D(); |