summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-27 02:58:00 +0200
committerGravatar Fernando Sahmkow2023-08-27 04:26:22 +0200
commit115792158d3ac4ca746d1775f2381e8f8dd18582 (patch)
treefec8995dd2a887068625e9d1278d0562bee6a8cb /src/video_core/engines
parentShader Recompiler: Auto stub special registers and dump pipelines on exception. (diff)
downloadyuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.gz
yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.xz
yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.zip
VideoCore: Implement DispatchIndirect
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/engine_interface.h8
-rw-r--r--src/video_core/engines/engine_upload.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp20
-rw-r--r--src/video_core/engines/kepler_compute.h17
-rw-r--r--src/video_core/engines/puller.cpp15
5 files changed, 62 insertions, 6 deletions
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index 392322358..54631ee6c 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -11,6 +11,14 @@
11 11
12namespace Tegra::Engines { 12namespace Tegra::Engines {
13 13
14enum class EngineTypes : u32 {
15 KeplerCompute,
16 Maxwell3D,
17 Fermi2D,
18 MaxwellDMA,
19 KeplerMemory,
20};
21
14class EngineInterface { 22class EngineInterface {
15public: 23public:
16 virtual ~EngineInterface() = default; 24 virtual ~EngineInterface() = default;
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 7242d2529..21bf8aeb4 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -69,6 +69,14 @@ public:
69 /// Binds a rasterizer to this engine. 69 /// Binds a rasterizer to this engine.
70 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 70 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
71 71
72 GPUVAddr ExecTargetAddress() const {
73 return regs.dest.Address();
74 }
75
76 u32 GetUploadSize() const {
77 return copy_size;
78 }
79
72private: 80private:
73 void ProcessData(std::span<const u8> read_buffer); 81 void ProcessData(std::span<const u8> read_buffer);
74 82
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a38d9528a..cd61ab222 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
43 43
44 switch (method) { 44 switch (method) {
45 case KEPLER_COMPUTE_REG_INDEX(exec_upload): { 45 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
46 UploadInfo info{.upload_address = upload_address,
47 .exec_address = upload_state.ExecTargetAddress(),
48 .copy_size = upload_state.GetUploadSize()};
49 uploads.push_back(info);
46 upload_state.ProcessExec(regs.exec_upload.linear != 0); 50 upload_state.ProcessExec(regs.exec_upload.linear != 0);
47 break; 51 break;
48 } 52 }
49 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 53 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
54 upload_address = current_dma_segment;
50 upload_state.ProcessData(method_argument, is_last_call); 55 upload_state.ProcessData(method_argument, is_last_call);
51 break; 56 break;
52 } 57 }
53 case KEPLER_COMPUTE_REG_INDEX(launch): 58 case KEPLER_COMPUTE_REG_INDEX(launch): {
59 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
60
61 for (auto& data : uploads) {
62 const GPUVAddr offset = data.exec_address - launch_desc_loc;
63 if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) &&
64 memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) {
65 indirect_compute = {data.upload_address};
66 }
67 }
68 uploads.clear();
54 ProcessLaunch(); 69 ProcessLaunch();
70 indirect_compute = std::nullopt;
55 break; 71 break;
72 }
56 default: 73 default:
57 break; 74 break;
58 } 75 }
@@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
62 u32 methods_pending) { 79 u32 methods_pending) {
63 switch (method) { 80 switch (method) {
64 case KEPLER_COMPUTE_REG_INDEX(data_upload): 81 case KEPLER_COMPUTE_REG_INDEX(data_upload):
82 upload_address = current_dma_segment;
65 upload_state.ProcessData(base_start, amount); 83 upload_state.ProcessData(base_start, amount);
66 return; 84 return;
67 default: 85 default:
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 2092e685f..735e05fb4 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,6 +5,7 @@
5 5
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <optional>
8#include <vector> 9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
@@ -36,6 +37,9 @@ namespace Tegra::Engines {
36#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 37#define KEPLER_COMPUTE_REG_INDEX(field_name) \
37 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 38 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
38 39
40#define LAUNCH_REG_INDEX(field_name) \
41 (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32))
42
39class KeplerCompute final : public EngineInterface { 43class KeplerCompute final : public EngineInterface {
40public: 44public:
41 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); 45 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
@@ -201,6 +205,10 @@ public:
201 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 205 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
202 u32 methods_pending) override; 206 u32 methods_pending) override;
203 207
208 std::optional<GPUVAddr> GetIndirectComputeAddress() const {
209 return indirect_compute;
210 }
211
204private: 212private:
205 void ProcessLaunch(); 213 void ProcessLaunch();
206 214
@@ -216,6 +224,15 @@ private:
216 MemoryManager& memory_manager; 224 MemoryManager& memory_manager;
217 VideoCore::RasterizerInterface* rasterizer = nullptr; 225 VideoCore::RasterizerInterface* rasterizer = nullptr;
218 Upload::State upload_state; 226 Upload::State upload_state;
227 GPUVAddr upload_address;
228
229 struct UploadInfo {
230 GPUVAddr upload_address;
231 GPUVAddr exec_address;
232 u32 copy_size;
233 };
234 std::vector<UploadInfo> uploads;
235 std::optional<GPUVAddr> indirect_compute{};
219}; 236};
220 237
221#define ASSERT_REG_POSITION(field_name, position) \ 238#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 7718a09b3..6de2543b7 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
34 bound_engines[method_call.subchannel] = engine_id; 34 bound_engines[method_call.subchannel] = engine_id;
35 switch (engine_id) { 35 switch (engine_id) {
36 case EngineID::FERMI_TWOD_A: 36 case EngineID::FERMI_TWOD_A:
37 dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); 37 dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel,
38 EngineTypes::Fermi2D);
38 break; 39 break;
39 case EngineID::MAXWELL_B: 40 case EngineID::MAXWELL_B:
40 dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); 41 dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel,
42 EngineTypes::Maxwell3D);
41 break; 43 break;
42 case EngineID::KEPLER_COMPUTE_B: 44 case EngineID::KEPLER_COMPUTE_B:
43 dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); 45 dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel,
46 EngineTypes::KeplerCompute);
44 break; 47 break;
45 case EngineID::MAXWELL_DMA_COPY_A: 48 case EngineID::MAXWELL_DMA_COPY_A:
46 dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); 49 dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel,
50 EngineTypes::MaxwellDMA);
47 break; 51 break;
48 case EngineID::KEPLER_INLINE_TO_MEMORY_B: 52 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
49 dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); 53 dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel,
54 EngineTypes::KeplerMemory);
50 break; 55 break;
51 default: 56 default:
52 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); 57 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);