video_core: Move command buffer loop.

This moves the hot loop into video_core. This refactoring shall reduce the CPU overhead of calling ProcessCommandList.
author: Markus Wick 2018-09-06 15:48:08 +0200
committer: Markus Wick 2018-09-10 22:06:13 +0200
commit: 0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch)
tree: c6fdc90795dc4e1851e2b3e3bd792e48f19251ba
parent: rasterizer: Drop unused handler. (diff)
download: yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.gz
yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.xz
yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.zip
5 files changed, 84 insertions, 77 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 4cdf7f613..8e0f9a9e5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -8,6 +8,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/memory.h"
+#include "video_core/command_processor.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
@@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                params.address, params.num_entries, params.flags);
-    ASSERT_MSG(input.size() ==
+    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
-                   sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),
+                                   params.num_entries * sizeof(Tegra::CommandListHeader),
               "Incorrect input size");
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
    std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
-                params.num_entries * sizeof(IoctlGpfifoEntry));
+                params.num_entries * sizeof(Tegra::CommandListHeader));
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
    params.fence_out.id = 0;
    params.fence_out.value = 0;
    std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
@@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
    LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                params.address, params.num_entries, params.flags);
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
    Memory::ReadBlock(params.address, entries.data(),
-                      params.num_entries * sizeof(IoctlGpfifoEntry));
+                      params.num_entries * sizeof(Tegra::CommandListHeader));
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
    params.fence_out.id = 0;
    params.fence_out.value = 0;
    std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 03b7356d0..baaefd79a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
-#include "video_core/memory_manager.h"
 namespace Service::Nvidia::Devices {
@@ -151,22 +150,6 @@ private:
    };
    static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
-    struct IoctlGpfifoEntry {
-        u32_le entry0; // gpu_va_lo
-        union {
-            u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
-            BitField<0, 8, u32_le> gpu_va_hi;
-            BitField<8, 2, u32_le> unk1;
-            BitField<10, 21, u32_le> sz;
-            BitField<31, 1, u32_le> unk2;
-        };
-        Tegra::GPUVAddr Address() const {
-            return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
-        }
-    };
-    static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
    struct IoctlSubmitGpfifo {
        u64_le address;     // pointer to gpfifo entry structs
        u32_le num_entries; // number of fence objects being submitted
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..e0c277105 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -69,57 +69,64 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
    }
 }
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
-    const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
-    VAddr current_addr = *head_address;
-    while (current_addr < *head_address + size * sizeof(CommandHeader)) {
-        const CommandHeader header = {Memory::Read32(current_addr)};
-        current_addr += sizeof(u32);
-        switch (header.mode.Value()) {
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
-        case SubmissionMode::IncreasingOld:
+    MICROPROFILE_SCOPE(ProcessCommandLists);
-        case SubmissionMode::Increasing: {
+    for (auto entry : commands) {
-            // Increase the method value with each argument.
+        Tegra::GPUVAddr address = entry.Address();
-            for (unsigned i = 0; i < header.arg_count; ++i) {
+        u32 size = entry.sz;
-                WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
-                         header.arg_count - i - 1);
+        VAddr current_addr = *head_address;
-                current_addr += sizeof(u32);
+        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+            const CommandHeader header = {Memory::Read32(current_addr)};
+            current_addr += sizeof(u32);
+            switch (header.mode.Value()) {
+            case SubmissionMode::IncreasingOld:
+            case SubmissionMode::Increasing: {
+                // Increase the method value with each argument.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
            }
-            break;
+            case SubmissionMode::NonIncreasingOld:
-        }
+            case SubmissionMode::NonIncreasing: {
-        case SubmissionMode::NonIncreasingOld:
+                // Use the same method value for all arguments.
-        case SubmissionMode::NonIncreasing: {
+                for (unsigned i = 0; i < header.arg_count; ++i) {
-            // Use the same method value for all arguments.
+                    WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-            for (unsigned i = 0; i < header.arg_count; ++i) {
+                             header.arg_count - i - 1);
-                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                    current_addr += sizeof(u32);
-                         header.arg_count - i - 1);
+                }
-                current_addr += sizeof(u32);
+                break;
            }
-            break;
+            case SubmissionMode::IncreaseOnce: {
-        }
+                ASSERT(header.arg_count.Value() >= 1);
-        case SubmissionMode::IncreaseOnce: {
-            ASSERT(header.arg_count.Value() >= 1);
-            // Use the original method for the first argument and then the next method for all other
+                // Use the original method for the first argument and then the next method for all
-            // arguments.
+                // other arguments.
-            WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                     header.arg_count - 1);
+                         header.arg_count - 1);
-            current_addr += sizeof(u32);
-            for (unsigned i = 1; i < header.arg_count; ++i) {
-                WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
                current_addr += sizeof(u32);
+                for (unsigned i = 1; i < header.arg_count; ++i) {
+                    WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::Inline: {
+                // The register value is stored in the bits 16-28 as an immediate
+                WriteReg(header.method, header.subchannel, header.inline_data, 0);
+                break;
+            }
+            default:
+                UNIMPLEMENTED();
            }
-            break;
-        }
-        case SubmissionMode::Inline: {
-            // The register value is stored in the bits 16-28 as an immediate
-            WriteReg(header.method, header.subchannel, header.inline_data, 0);
-            break;
-        }
-        default:
-            UNIMPLEMENTED();
        }
    }
 }
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
 #include <type_traits>
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "video_core/memory_manager.h"
 namespace Tegra {
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
    IncreaseOnce = 5
 };
+struct CommandListHeader {
+    u32 entry0; // gpu_va_lo
+    union {
+        u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+        BitField<0, 8, u32> gpu_va_hi;
+        BitField<8, 2, u32> unk1;
+        BitField<10, 21, u32> sz;
+        BitField<31, 1, u32> unk2;
+    };
+    GPUVAddr Address() const {
+        return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+    }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
 union CommandHeader {
    u32 hex;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..9163fbdc6 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 #include <array>
 #include <memory>
+#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/memory_manager.h"
@@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
 /// Returns the number of bytes per pixel of each depth format.
 u32 DepthFormatBytesPerPixel(DepthFormat format);
+struct CommandListHeader;
 class DebugContext;
 /**
@@ -115,7 +117,7 @@ public:
    ~GPU();
    /// Processes a command list stored at the specified address in GPU memory.
-    void ProcessCommandList(GPUVAddr address, u32 size);
+    void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
    /// Returns a reference to the Maxwell3D GPU engine.
    Engines::Maxwell3D& Maxwell3D();
author	Markus Wick	2018-09-06 15:48:08 +0200
committer	Markus Wick	2018-09-10 22:06:13 +0200
commit	0cfb0bacb2581d79631f496afbc3a3d5dd19eb42 (patch)
tree	c6fdc90795dc4e1851e2b3e3bd792e48f19251ba
parent	rasterizer: Drop unused handler. (diff)
download	yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.gz yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.tar.xz yuzu-0cfb0bacb2581d79631f496afbc3a3d5dd19eb42.zip

diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 4cdf7f613..8e0f9a9e5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -8,6 +8,7 @@
8	#include "core/core.h"	8	#include "core/core.h"
9	#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"	9	#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
10	#include "core/memory.h"	10	#include "core/memory.h"
		11	#include "video_core/command_processor.h"
11	#include "video_core/gpu.h"	12	#include "video_core/gpu.h"
12	#include "video_core/memory_manager.h"	13	#include "video_core/memory_manager.h"
13		14
@@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
134	LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",	135	LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
135	params.address, params.num_entries, params.flags);	136	params.address, params.num_entries, params.flags);
136		137
137	ASSERT_MSG(input.size() ==	138	ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
138	sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),	139	params.num_entries * sizeof(Tegra::CommandListHeader),
139	"Incorrect input size");	140	"Incorrect input size");
140		141
141	std::vector<IoctlGpfifoEntry> entries(params.num_entries);	142	std::vector<Tegra::CommandListHeader> entries(params.num_entries);
142	std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],	143	std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
143	params.num_entries * sizeof(IoctlGpfifoEntry));	144	params.num_entries * sizeof(Tegra::CommandListHeader));
144	for (auto entry : entries) {	145
145	Tegra::GPUVAddr va_addr = entry.Address();	146	Core::System::GetInstance().GPU().ProcessCommandLists(entries);
146	Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);	147
147	}
148	params.fence_out.id = 0;	148	params.fence_out.id = 0;
149	params.fence_out.value = 0;	149	params.fence_out.value = 0;
150	std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));	150	std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
@@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
160	LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",	160	LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
161	params.address, params.num_entries, params.flags);	161	params.address, params.num_entries, params.flags);
162		162
163	std::vector<IoctlGpfifoEntry> entries(params.num_entries);	163	std::vector<Tegra::CommandListHeader> entries(params.num_entries);
164	Memory::ReadBlock(params.address, entries.data(),	164	Memory::ReadBlock(params.address, entries.data(),
165	params.num_entries * sizeof(IoctlGpfifoEntry));	165	params.num_entries * sizeof(Tegra::CommandListHeader));
		166
		167	Core::System::GetInstance().GPU().ProcessCommandLists(entries);
166		168
167	for (auto entry : entries) {
168	Tegra::GPUVAddr va_addr = entry.Address();
169	Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
170	}
171	params.fence_out.id = 0;	169	params.fence_out.id = 0;
172	params.fence_out.value = 0;	170	params.fence_out.value = 0;
173	std::memcpy(output.data(), &params, output.size());	171	std::memcpy(output.data(), &params, output.size());


diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 03b7356d0..baaefd79a 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,7 +10,6 @@
10	#include "common/common_types.h"	10	#include "common/common_types.h"
11	#include "common/swap.h"	11	#include "common/swap.h"
12	#include "core/hle/service/nvdrv/devices/nvdevice.h"	12	#include "core/hle/service/nvdrv/devices/nvdevice.h"
13	#include "video_core/memory_manager.h"
14		13
15	namespace Service::Nvidia::Devices {	14	namespace Service::Nvidia::Devices {
16		15
@@ -151,22 +150,6 @@ private:
151	};	150	};
152	static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");	151	static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
153		152
154	struct IoctlGpfifoEntry {
155	u32_le entry0; // gpu_va_lo
156	union {
157	u32_le entry1; // gpu_va_hi \| (unk_0x02 << 0x08) \| (size << 0x0A) \| (unk_0x01 << 0x1F)
158	BitField<0, 8, u32_le> gpu_va_hi;
159	BitField<8, 2, u32_le> unk1;
160	BitField<10, 21, u32_le> sz;
161	BitField<31, 1, u32_le> unk2;
162	};
163
164	Tegra::GPUVAddr Address() const {
165	return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) \| entry0;
166	}
167	};
168	static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
169
170	struct IoctlSubmitGpfifo {	153	struct IoctlSubmitGpfifo {
171	u64_le address; // pointer to gpfifo entry structs	154	u64_le address; // pointer to gpfifo entry structs
172	u32_le num_entries; // number of fence objects being submitted	155	u32_le num_entries; // number of fence objects being submitted


diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index d5831e752..e0c277105 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp
@@ -69,57 +69,64 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
69	}	69	}
70	}	70	}
71		71
72	void GPU::ProcessCommandList(GPUVAddr address, u32 size) {	72	MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
73	const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
74	VAddr current_addr = *head_address;
75	while (current_addr < head_address + size sizeof(CommandHeader)) {
76	const CommandHeader header = {Memory::Read32(current_addr)};
77	current_addr += sizeof(u32);
78		73
79	switch (header.mode.Value()) {	74	void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
80	case SubmissionMode::IncreasingOld:	75	MICROPROFILE_SCOPE(ProcessCommandLists);
81	case SubmissionMode::Increasing: {	76	for (auto entry : commands) {
82	// Increase the method value with each argument.	77	Tegra::GPUVAddr address = entry.Address();
83	for (unsigned i = 0; i < header.arg_count; ++i) {	78	u32 size = entry.sz;
84	WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),	79	const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
85	header.arg_count - i - 1);	80	VAddr current_addr = *head_address;
86	current_addr += sizeof(u32);	81	while (current_addr < head_address + size sizeof(CommandHeader)) {
		82	const CommandHeader header = {Memory::Read32(current_addr)};
		83	current_addr += sizeof(u32);
		84
		85	switch (header.mode.Value()) {
		86	case SubmissionMode::IncreasingOld:
		87	case SubmissionMode::Increasing: {
		88	// Increase the method value with each argument.
		89	for (unsigned i = 0; i < header.arg_count; ++i) {
		90	WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
		91	header.arg_count - i - 1);
		92	current_addr += sizeof(u32);
		93	}
		94	break;
87	}	95	}
88	break;	96	case SubmissionMode::NonIncreasingOld:
89	}	97	case SubmissionMode::NonIncreasing: {
90	case SubmissionMode::NonIncreasingOld:	98	// Use the same method value for all arguments.
91	case SubmissionMode::NonIncreasing: {	99	for (unsigned i = 0; i < header.arg_count; ++i) {
92	// Use the same method value for all arguments.	100	WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
93	for (unsigned i = 0; i < header.arg_count; ++i) {	101	header.arg_count - i - 1);
94	WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),	102	current_addr += sizeof(u32);
95	header.arg_count - i - 1);	103	}
96	current_addr += sizeof(u32);	104	break;
97	}	105	}
98	break;	106	case SubmissionMode::IncreaseOnce: {
99	}	107	ASSERT(header.arg_count.Value() >= 1);
100	case SubmissionMode::IncreaseOnce: {
101	ASSERT(header.arg_count.Value() >= 1);
102		108
103	// Use the original method for the first argument and then the next method for all other	109	// Use the original method for the first argument and then the next method for all
104	// arguments.	110	// other arguments.
105	WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),	111	WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
106	header.arg_count - 1);	112	header.arg_count - 1);
107	current_addr += sizeof(u32);
108
109	for (unsigned i = 1; i < header.arg_count; ++i) {
110	WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
111	header.arg_count - i - 1);
112	current_addr += sizeof(u32);	113	current_addr += sizeof(u32);
		114
		115	for (unsigned i = 1; i < header.arg_count; ++i) {
		116	WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
		117	header.arg_count - i - 1);
		118	current_addr += sizeof(u32);
		119	}
		120	break;
		121	}
		122	case SubmissionMode::Inline: {
		123	// The register value is stored in the bits 16-28 as an immediate
		124	WriteReg(header.method, header.subchannel, header.inline_data, 0);
		125	break;
		126	}
		127	default:
		128	UNIMPLEMENTED();
113	}	129	}
114	break;
115	}
116	case SubmissionMode::Inline: {
117	// The register value is stored in the bits 16-28 as an immediate
118	WriteReg(header.method, header.subchannel, header.inline_data, 0);
119	break;
120	}
121	default:
122	UNIMPLEMENTED();
123	}	130	}
124	}	131	}
125	}	132	}


diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index a01153e0b..bd766e77a 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
7	#include <type_traits>	7	#include <type_traits>
8	#include "common/bit_field.h"	8	#include "common/bit_field.h"
9	#include "common/common_types.h"	9	#include "common/common_types.h"
		10	#include "video_core/memory_manager.h"
10		11
11	namespace Tegra {	12	namespace Tegra {
12		13
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
19	IncreaseOnce = 5	20	IncreaseOnce = 5
20	};	21	};
21		22
		23	struct CommandListHeader {
		24	u32 entry0; // gpu_va_lo
		25	union {
		26	u32 entry1; // gpu_va_hi \| (unk_0x02 << 0x08) \| (size << 0x0A) \| (unk_0x01 << 0x1F)
		27	BitField<0, 8, u32> gpu_va_hi;
		28	BitField<8, 2, u32> unk1;
		29	BitField<10, 21, u32> sz;
		30	BitField<31, 1, u32> unk2;
		31	};
		32
		33	GPUVAddr Address() const {
		34	return (static_cast<GPUVAddr>(gpu_va_hi) << 32) \| entry0;
		35	}
		36	};
		37	static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
		38
22	union CommandHeader {	39	union CommandHeader {
23	u32 hex;	40	u32 hex;
24		41


diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index d29f31f52..9163fbdc6 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
6		6
7	#include <array>	7	#include <array>
8	#include <memory>	8	#include <memory>
		9	#include <vector>
9	#include "common/common_types.h"	10	#include "common/common_types.h"
10	#include "core/hle/service/nvflinger/buffer_queue.h"	11	#include "core/hle/service/nvflinger/buffer_queue.h"
11	#include "video_core/memory_manager.h"	12	#include "video_core/memory_manager.h"
@@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
67	/// Returns the number of bytes per pixel of each depth format.	68	/// Returns the number of bytes per pixel of each depth format.
68	u32 DepthFormatBytesPerPixel(DepthFormat format);	69	u32 DepthFormatBytesPerPixel(DepthFormat format);
69		70
		71	struct CommandListHeader;
70	class DebugContext;	72	class DebugContext;
71		73
72	/**	74	/**
@@ -115,7 +117,7 @@ public:
115	~GPU();	117	~GPU();
116		118
117	/// Processes a command list stored at the specified address in GPU memory.	119	/// Processes a command list stored at the specified address in GPU memory.
118	void ProcessCommandList(GPUVAddr address, u32 size);	120	void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
119		121
120	/// Returns a reference to the Maxwell3D GPU engine.	122	/// Returns a reference to the Maxwell3D GPU engine.
121	Engines::Maxwell3D& Maxwell3D();	123	Engines::Maxwell3D& Maxwell3D();