diff options
| author | 2023-08-27 02:58:00 +0200 | |
|---|---|---|
| committer | 2023-08-27 04:26:22 +0200 | |
| commit | 115792158d3ac4ca746d1775f2381e8f8dd18582 (patch) | |
| tree | fec8995dd2a887068625e9d1278d0562bee6a8cb /src/video_core/engines | |
| parent | Shader Recompiler: Auto stub special registers and dump pipelines on exception. (diff) | |
| download | yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.gz yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.xz yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.zip | |
VideoCore: Implement DispatchIndirect
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/engine_interface.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 17 | ||||
| -rw-r--r-- | src/video_core/engines/puller.cpp | 15 |
5 files changed, 62 insertions, 6 deletions
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 392322358..54631ee6c 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h | |||
| @@ -11,6 +11,14 @@ | |||
| 11 | 11 | ||
| 12 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| 13 | 13 | ||
| 14 | enum class EngineTypes : u32 { | ||
| 15 | KeplerCompute, | ||
| 16 | Maxwell3D, | ||
| 17 | Fermi2D, | ||
| 18 | MaxwellDMA, | ||
| 19 | KeplerMemory, | ||
| 20 | }; | ||
| 21 | |||
| 14 | class EngineInterface { | 22 | class EngineInterface { |
| 15 | public: | 23 | public: |
| 16 | virtual ~EngineInterface() = default; | 24 | virtual ~EngineInterface() = default; |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 7242d2529..21bf8aeb4 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -69,6 +69,14 @@ public: | |||
| 69 | /// Binds a rasterizer to this engine. | 69 | /// Binds a rasterizer to this engine. |
| 70 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | 70 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 71 | 71 | ||
| 72 | GPUVAddr ExecTargetAddress() const { | ||
| 73 | return regs.dest.Address(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 GetUploadSize() const { | ||
| 77 | return copy_size; | ||
| 78 | } | ||
| 79 | |||
| 72 | private: | 80 | private: |
| 73 | void ProcessData(std::span<const u8> read_buffer); | 81 | void ProcessData(std::span<const u8> read_buffer); |
| 74 | 82 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a38d9528a..cd61ab222 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 43 | 43 | ||
| 44 | switch (method) { | 44 | switch (method) { |
| 45 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | 45 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { |
| 46 | UploadInfo info{.upload_address = upload_address, | ||
| 47 | .exec_address = upload_state.ExecTargetAddress(), | ||
| 48 | .copy_size = upload_state.GetUploadSize()}; | ||
| 49 | uploads.push_back(info); | ||
| 46 | upload_state.ProcessExec(regs.exec_upload.linear != 0); | 50 | upload_state.ProcessExec(regs.exec_upload.linear != 0); |
| 47 | break; | 51 | break; |
| 48 | } | 52 | } |
| 49 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 53 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 54 | upload_address = current_dma_segment; | ||
| 50 | upload_state.ProcessData(method_argument, is_last_call); | 55 | upload_state.ProcessData(method_argument, is_last_call); |
| 51 | break; | 56 | break; |
| 52 | } | 57 | } |
| 53 | case KEPLER_COMPUTE_REG_INDEX(launch): | 58 | case KEPLER_COMPUTE_REG_INDEX(launch): { |
| 59 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||
| 60 | |||
| 61 | for (auto& data : uploads) { | ||
| 62 | const GPUVAddr offset = data.exec_address - launch_desc_loc; | ||
| 63 | if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && | ||
| 64 | memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { | ||
| 65 | indirect_compute = {data.upload_address}; | ||
| 66 | } | ||
| 67 | } | ||
| 68 | uploads.clear(); | ||
| 54 | ProcessLaunch(); | 69 | ProcessLaunch(); |
| 70 | indirect_compute = std::nullopt; | ||
| 55 | break; | 71 | break; |
| 72 | } | ||
| 56 | default: | 73 | default: |
| 57 | break; | 74 | break; |
| 58 | } | 75 | } |
| @@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun | |||
| 62 | u32 methods_pending) { | 79 | u32 methods_pending) { |
| 63 | switch (method) { | 80 | switch (method) { |
| 64 | case KEPLER_COMPUTE_REG_INDEX(data_upload): | 81 | case KEPLER_COMPUTE_REG_INDEX(data_upload): |
| 82 | upload_address = current_dma_segment; | ||
| 65 | upload_state.ProcessData(base_start, amount); | 83 | upload_state.ProcessData(base_start, amount); |
| 66 | return; | 84 | return; |
| 67 | default: | 85 | default: |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 2092e685f..735e05fb4 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <optional> | ||
| 8 | #include <vector> | 9 | #include <vector> |
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| @@ -36,6 +37,9 @@ namespace Tegra::Engines { | |||
| 36 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 37 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 37 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 38 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 38 | 39 | ||
| 40 | #define LAUNCH_REG_INDEX(field_name) \ | ||
| 41 | (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) | ||
| 42 | |||
| 39 | class KeplerCompute final : public EngineInterface { | 43 | class KeplerCompute final : public EngineInterface { |
| 40 | public: | 44 | public: |
| 41 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); | 45 | explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); |
| @@ -201,6 +205,10 @@ public: | |||
| 201 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 205 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 202 | u32 methods_pending) override; | 206 | u32 methods_pending) override; |
| 203 | 207 | ||
| 208 | std::optional<GPUVAddr> GetIndirectComputeAddress() const { | ||
| 209 | return indirect_compute; | ||
| 210 | } | ||
| 211 | |||
| 204 | private: | 212 | private: |
| 205 | void ProcessLaunch(); | 213 | void ProcessLaunch(); |
| 206 | 214 | ||
| @@ -216,6 +224,15 @@ private: | |||
| 216 | MemoryManager& memory_manager; | 224 | MemoryManager& memory_manager; |
| 217 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 225 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 218 | Upload::State upload_state; | 226 | Upload::State upload_state; |
| 227 | GPUVAddr upload_address; | ||
| 228 | |||
| 229 | struct UploadInfo { | ||
| 230 | GPUVAddr upload_address; | ||
| 231 | GPUVAddr exec_address; | ||
| 232 | u32 copy_size; | ||
| 233 | }; | ||
| 234 | std::vector<UploadInfo> uploads; | ||
| 235 | std::optional<GPUVAddr> indirect_compute{}; | ||
| 219 | }; | 236 | }; |
| 220 | 237 | ||
| 221 | #define ASSERT_REG_POSITION(field_name, position) \ | 238 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 7718a09b3..6de2543b7 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { | |||
| 34 | bound_engines[method_call.subchannel] = engine_id; | 34 | bound_engines[method_call.subchannel] = engine_id; |
| 35 | switch (engine_id) { | 35 | switch (engine_id) { |
| 36 | case EngineID::FERMI_TWOD_A: | 36 | case EngineID::FERMI_TWOD_A: |
| 37 | dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); | 37 | dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, |
| 38 | EngineTypes::Fermi2D); | ||
| 38 | break; | 39 | break; |
| 39 | case EngineID::MAXWELL_B: | 40 | case EngineID::MAXWELL_B: |
| 40 | dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); | 41 | dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, |
| 42 | EngineTypes::Maxwell3D); | ||
| 41 | break; | 43 | break; |
| 42 | case EngineID::KEPLER_COMPUTE_B: | 44 | case EngineID::KEPLER_COMPUTE_B: |
| 43 | dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); | 45 | dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, |
| 46 | EngineTypes::KeplerCompute); | ||
| 44 | break; | 47 | break; |
| 45 | case EngineID::MAXWELL_DMA_COPY_A: | 48 | case EngineID::MAXWELL_DMA_COPY_A: |
| 46 | dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); | 49 | dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, |
| 50 | EngineTypes::MaxwellDMA); | ||
| 47 | break; | 51 | break; |
| 48 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | 52 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: |
| 49 | dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); | 53 | dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, |
| 54 | EngineTypes::KeplerMemory); | ||
| 50 | break; | 55 | break; |
| 51 | default: | 56 | default: |
| 52 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | 57 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); |