summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar Markus Wick2018-09-06 15:48:08 +0200
committerGravatar Markus Wick2018-09-10 22:06:13 +0200
commit0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch)
treec6fdc90795dc4e1851e2b3e3bd792e48f19251ba /src/video_core
parentrasterizer: Drop unused handler. (diff)
downloadyuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.gz
yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.xz
yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.zip
video_core: Move command buffer loop.
This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp97
-rw-r--r--src/video_core/command_processor.h17
-rw-r--r--src/video_core/gpu.h4
3 files changed, 72 insertions, 46 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..e0c277105 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -69,57 +69,64 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
69 } 69 }
70} 70}
71 71
72void GPU::ProcessCommandList(GPUVAddr address, u32 size) { 72MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
73 const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
74 VAddr current_addr = *head_address;
75 while (current_addr < *head_address + size * sizeof(CommandHeader)) {
76 const CommandHeader header = {Memory::Read32(current_addr)};
77 current_addr += sizeof(u32);
78 73
79 switch (header.mode.Value()) { 74void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
80 case SubmissionMode::IncreasingOld: 75 MICROPROFILE_SCOPE(ProcessCommandLists);
81 case SubmissionMode::Increasing: { 76 for (auto entry : commands) {
82 // Increase the method value with each argument. 77 Tegra::GPUVAddr address = entry.Address();
83 for (unsigned i = 0; i < header.arg_count; ++i) { 78 u32 size = entry.sz;
84 WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr), 79 const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
85 header.arg_count - i - 1); 80 VAddr current_addr = *head_address;
86 current_addr += sizeof(u32); 81 while (current_addr < *head_address + size * sizeof(CommandHeader)) {
82 const CommandHeader header = {Memory::Read32(current_addr)};
83 current_addr += sizeof(u32);
84
85 switch (header.mode.Value()) {
86 case SubmissionMode::IncreasingOld:
87 case SubmissionMode::Increasing: {
88 // Increase the method value with each argument.
89 for (unsigned i = 0; i < header.arg_count; ++i) {
90 WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
91 header.arg_count - i - 1);
92 current_addr += sizeof(u32);
93 }
94 break;
87 } 95 }
88 break; 96 case SubmissionMode::NonIncreasingOld:
89 } 97 case SubmissionMode::NonIncreasing: {
90 case SubmissionMode::NonIncreasingOld: 98 // Use the same method value for all arguments.
91 case SubmissionMode::NonIncreasing: { 99 for (unsigned i = 0; i < header.arg_count; ++i) {
92 // Use the same method value for all arguments. 100 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
93 for (unsigned i = 0; i < header.arg_count; ++i) { 101 header.arg_count - i - 1);
94 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), 102 current_addr += sizeof(u32);
95 header.arg_count - i - 1); 103 }
96 current_addr += sizeof(u32); 104 break;
97 } 105 }
98 break; 106 case SubmissionMode::IncreaseOnce: {
99 } 107 ASSERT(header.arg_count.Value() >= 1);
100 case SubmissionMode::IncreaseOnce: {
101 ASSERT(header.arg_count.Value() >= 1);
102 108
103 // Use the original method for the first argument and then the next method for all other 109 // Use the original method for the first argument and then the next method for all
104 // arguments. 110 // other arguments.
105 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr), 111 WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
106 header.arg_count - 1); 112 header.arg_count - 1);
107 current_addr += sizeof(u32);
108
109 for (unsigned i = 1; i < header.arg_count; ++i) {
110 WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
111 header.arg_count - i - 1);
112 current_addr += sizeof(u32); 113 current_addr += sizeof(u32);
114
115 for (unsigned i = 1; i < header.arg_count; ++i) {
116 WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
117 header.arg_count - i - 1);
118 current_addr += sizeof(u32);
119 }
120 break;
121 }
122 case SubmissionMode::Inline: {
123 // The register value is stored in the bits 16-28 as an immediate
124 WriteReg(header.method, header.subchannel, header.inline_data, 0);
125 break;
126 }
127 default:
128 UNIMPLEMENTED();
113 } 129 }
114 break;
115 }
116 case SubmissionMode::Inline: {
117 // The register value is stored in the bits 16-28 as an immediate
118 WriteReg(header.method, header.subchannel, header.inline_data, 0);
119 break;
120 }
121 default:
122 UNIMPLEMENTED();
123 } 130 }
124 } 131 }
125} 132}
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
7#include <type_traits> 7#include <type_traits>
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/memory_manager.h"
10 11
11namespace Tegra { 12namespace Tegra {
12 13
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
19 IncreaseOnce = 5 20 IncreaseOnce = 5
20}; 21};
21 22
23struct CommandListHeader {
24 u32 entry0; // gpu_va_lo
25 union {
26 u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
27 BitField<0, 8, u32> gpu_va_hi;
28 BitField<8, 2, u32> unk1;
29 BitField<10, 21, u32> sz;
30 BitField<31, 1, u32> unk2;
31 };
32
33 GPUVAddr Address() const {
34 return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
35 }
36};
37static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
38
22union CommandHeader { 39union CommandHeader {
23 u32 hex; 40 u32 hex;
24 41
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..9163fbdc6 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "core/hle/service/nvflinger/buffer_queue.h" 11#include "core/hle/service/nvflinger/buffer_queue.h"
11#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
@@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
67/// Returns the number of bytes per pixel of each depth format. 68/// Returns the number of bytes per pixel of each depth format.
68u32 DepthFormatBytesPerPixel(DepthFormat format); 69u32 DepthFormatBytesPerPixel(DepthFormat format);
69 70
71struct CommandListHeader;
70class DebugContext; 72class DebugContext;
71 73
72/** 74/**
@@ -115,7 +117,7 @@ public:
115 ~GPU(); 117 ~GPU();
116 118
117 /// Processes a command list stored at the specified address in GPU memory. 119 /// Processes a command list stored at the specified address in GPU memory.
118 void ProcessCommandList(GPUVAddr address, u32 size); 120 void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
119 121
120 /// Returns a reference to the Maxwell3D GPU engine. 122 /// Returns a reference to the Maxwell3D GPU engine.
121 Engines::Maxwell3D& Maxwell3D(); 123 Engines::Maxwell3D& Maxwell3D();