summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-27 02:58:00 +0200
committerGravatar Fernando Sahmkow2023-08-27 04:26:22 +0200
commit115792158d3ac4ca746d1775f2381e8f8dd18582 (patch)
treefec8995dd2a887068625e9d1278d0562bee6a8cb /src
parentShader Recompiler: Auto stub special registers and dump pipelines on exception. (diff)
downloadyuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.gz
yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.tar.xz
yuzu-115792158d3ac4ca746d1775f2381e8f8dd18582.zip
VideoCore: Implement DispatchIndirect
Diffstat (limited to 'src')
-rw-r--r--src/video_core/dma_pusher.cpp28
-rw-r--r--src/video_core/dma_pusher.h5
-rw-r--r--src/video_core/engines/engine_interface.h8
-rw-r--r--src/video_core/engines/engine_upload.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp20
-rw-r--r--src/video_core/engines/kepler_compute.h17
-rw-r--r--src/video_core/engines/puller.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h5
11 files changed, 119 insertions, 13 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index ab28951b6..58ce0d8c2 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -14,6 +14,7 @@
14namespace Tegra { 14namespace Tegra {
15 15
16constexpr u32 MacroRegistersStart = 0xE00; 16constexpr u32 MacroRegistersStart = 0xE00;
17constexpr u32 ComputeInline = 0x6D;
17 18
18DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, 19DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
19 Control::ChannelState& channel_state_) 20 Control::ChannelState& channel_state_)
@@ -83,20 +84,35 @@ bool DmaPusher::Step() {
83 dma_state.dma_get, command_list_header.size * sizeof(u32)); 84 dma_state.dma_get, command_list_header.size * sizeof(u32));
84 } 85 }
85 } 86 }
86 if (Settings::IsGPULevelHigh() && dma_state.method < MacroRegistersStart) { 87 const auto safe_process = [&] {
87 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 88 Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
88 Core::Memory::GuestMemoryFlags::SafeRead> 89 Core::Memory::GuestMemoryFlags::SafeRead>
89 headers(memory_manager, dma_state.dma_get, command_list_header.size, 90 headers(memory_manager, dma_state.dma_get, command_list_header.size,
90 &command_headers); 91 &command_headers);
91 ProcessCommands(headers); 92 ProcessCommands(headers);
93 };
94 const auto unsafe_process = [&] {
95 Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
96 Core::Memory::GuestMemoryFlags::UnsafeRead>
97 headers(memory_manager, dma_state.dma_get, command_list_header.size,
98 &command_headers);
99 ProcessCommands(headers);
100 };
101 if (Settings::IsGPULevelHigh()) {
102 if (dma_state.method >= MacroRegistersStart) {
103 unsafe_process();
104 return true;
105 }
106 if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute &&
107 dma_state.method == ComputeInline) {
108 unsafe_process();
109 return true;
110 }
111 safe_process();
92 return true; 112 return true;
93 } 113 }
94 Core::Memory::GpuGuestMemory<Tegra::CommandHeader, 114 unsafe_process();
95 Core::Memory::GuestMemoryFlags::UnsafeRead>
96 headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers);
97 ProcessCommands(headers);
98 } 115 }
99
100 return true; 116 return true;
101} 117}
102 118
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 8a2784cdc..c9fab2d90 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -130,8 +130,10 @@ public:
130 130
131 void DispatchCalls(); 131 void DispatchCalls();
132 132
133 void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { 133 void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id,
134 Engines::EngineTypes engine_type) {
134 subchannels[subchannel_id] = engine; 135 subchannels[subchannel_id] = engine;
136 subchannel_type[subchannel_id] = engine_type;
135 } 137 }
136 138
137 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 139 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
@@ -170,6 +172,7 @@ private:
170 const bool ib_enable{true}; ///< IB mode enabled 172 const bool ib_enable{true}; ///< IB mode enabled
171 173
172 std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; 174 std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
175 std::array<Engines::EngineTypes, max_subchannels> subchannel_type;
173 176
174 GPU& gpu; 177 GPU& gpu;
175 Core::System& system; 178 Core::System& system;
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index 392322358..54631ee6c 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -11,6 +11,14 @@
11 11
12namespace Tegra::Engines { 12namespace Tegra::Engines {
13 13
14enum class EngineTypes : u32 {
15 KeplerCompute,
16 Maxwell3D,
17 Fermi2D,
18 MaxwellDMA,
19 KeplerMemory,
20};
21
14class EngineInterface { 22class EngineInterface {
15public: 23public:
16 virtual ~EngineInterface() = default; 24 virtual ~EngineInterface() = default;
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 7242d2529..21bf8aeb4 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -69,6 +69,14 @@ public:
69 /// Binds a rasterizer to this engine. 69 /// Binds a rasterizer to this engine.
70 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 70 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
71 71
72 GPUVAddr ExecTargetAddress() const {
73 return regs.dest.Address();
74 }
75
76 u32 GetUploadSize() const {
77 return copy_size;
78 }
79
72private: 80private:
73 void ProcessData(std::span<const u8> read_buffer); 81 void ProcessData(std::span<const u8> read_buffer);
74 82
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a38d9528a..cd61ab222 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
43 43
44 switch (method) { 44 switch (method) {
45 case KEPLER_COMPUTE_REG_INDEX(exec_upload): { 45 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
46 UploadInfo info{.upload_address = upload_address,
47 .exec_address = upload_state.ExecTargetAddress(),
48 .copy_size = upload_state.GetUploadSize()};
49 uploads.push_back(info);
46 upload_state.ProcessExec(regs.exec_upload.linear != 0); 50 upload_state.ProcessExec(regs.exec_upload.linear != 0);
47 break; 51 break;
48 } 52 }
49 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 53 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
54 upload_address = current_dma_segment;
50 upload_state.ProcessData(method_argument, is_last_call); 55 upload_state.ProcessData(method_argument, is_last_call);
51 break; 56 break;
52 } 57 }
53 case KEPLER_COMPUTE_REG_INDEX(launch): 58 case KEPLER_COMPUTE_REG_INDEX(launch): {
59 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
60
61 for (auto& data : uploads) {
62 const GPUVAddr offset = data.exec_address - launch_desc_loc;
63 if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) &&
64 memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) {
65 indirect_compute = {data.upload_address};
66 }
67 }
68 uploads.clear();
54 ProcessLaunch(); 69 ProcessLaunch();
70 indirect_compute = std::nullopt;
55 break; 71 break;
72 }
56 default: 73 default:
57 break; 74 break;
58 } 75 }
@@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
62 u32 methods_pending) { 79 u32 methods_pending) {
63 switch (method) { 80 switch (method) {
64 case KEPLER_COMPUTE_REG_INDEX(data_upload): 81 case KEPLER_COMPUTE_REG_INDEX(data_upload):
82 upload_address = current_dma_segment;
65 upload_state.ProcessData(base_start, amount); 83 upload_state.ProcessData(base_start, amount);
66 return; 84 return;
67 default: 85 default:
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 2092e685f..735e05fb4 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,6 +5,7 @@
5 5
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <optional>
8#include <vector> 9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
@@ -36,6 +37,9 @@ namespace Tegra::Engines {
36#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 37#define KEPLER_COMPUTE_REG_INDEX(field_name) \
37 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 38 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
38 39
40#define LAUNCH_REG_INDEX(field_name) \
41 (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32))
42
39class KeplerCompute final : public EngineInterface { 43class KeplerCompute final : public EngineInterface {
40public: 44public:
41 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); 45 explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
@@ -201,6 +205,10 @@ public:
201 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 205 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
202 u32 methods_pending) override; 206 u32 methods_pending) override;
203 207
208 std::optional<GPUVAddr> GetIndirectComputeAddress() const {
209 return indirect_compute;
210 }
211
204private: 212private:
205 void ProcessLaunch(); 213 void ProcessLaunch();
206 214
@@ -216,6 +224,15 @@ private:
216 MemoryManager& memory_manager; 224 MemoryManager& memory_manager;
217 VideoCore::RasterizerInterface* rasterizer = nullptr; 225 VideoCore::RasterizerInterface* rasterizer = nullptr;
218 Upload::State upload_state; 226 Upload::State upload_state;
227 GPUVAddr upload_address;
228
229 struct UploadInfo {
230 GPUVAddr upload_address;
231 GPUVAddr exec_address;
232 u32 copy_size;
233 };
234 std::vector<UploadInfo> uploads;
235 std::optional<GPUVAddr> indirect_compute{};
219}; 236};
220 237
221#define ASSERT_REG_POSITION(field_name, position) \ 238#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 7718a09b3..6de2543b7 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
34 bound_engines[method_call.subchannel] = engine_id; 34 bound_engines[method_call.subchannel] = engine_id;
35 switch (engine_id) { 35 switch (engine_id) {
36 case EngineID::FERMI_TWOD_A: 36 case EngineID::FERMI_TWOD_A:
37 dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); 37 dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel,
38 EngineTypes::Fermi2D);
38 break; 39 break;
39 case EngineID::MAXWELL_B: 40 case EngineID::MAXWELL_B:
40 dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); 41 dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel,
42 EngineTypes::Maxwell3D);
41 break; 43 break;
42 case EngineID::KEPLER_COMPUTE_B: 44 case EngineID::KEPLER_COMPUTE_B:
43 dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); 45 dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel,
46 EngineTypes::KeplerCompute);
44 break; 47 break;
45 case EngineID::MAXWELL_DMA_COPY_A: 48 case EngineID::MAXWELL_DMA_COPY_A:
46 dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); 49 dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel,
50 EngineTypes::MaxwellDMA);
47 break; 51 break;
48 case EngineID::KEPLER_INLINE_TO_MEMORY_B: 52 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
49 dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); 53 dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel,
54 EngineTypes::KeplerMemory);
50 break; 55 break;
51 default: 56 default:
52 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); 57 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1ba31be88..dd03efecd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() {
380 pipeline->SetEngine(kepler_compute, gpu_memory); 380 pipeline->SetEngine(kepler_compute, gpu_memory);
381 pipeline->Configure(); 381 pipeline->Configure();
382 const auto& qmd{kepler_compute->launch_description}; 382 const auto& qmd{kepler_compute->launch_description};
383 auto indirect_address = kepler_compute->GetIndirectComputeAddress();
384 if (indirect_address) {
385 // DispatchIndirect
386 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
387 const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
388 const auto [buffer, offset] =
389 buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
390 glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle());
391 glDispatchComputeIndirect(static_cast<GLintptr>(offset));
392 return;
393 }
383 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); 394 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
384 ++num_queued_commands; 395 ++num_queued_commands;
385 has_written_global_memory |= pipeline->WritesGlobalMemory(); 396 has_written_global_memory |= pipeline->WritesGlobalMemory();
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 032f694bc..01e76a82c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -463,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() {
463 pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); 463 pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
464 464
465 const auto& qmd{kepler_compute->launch_description}; 465 const auto& qmd{kepler_compute->launch_description};
466 auto indirect_address = kepler_compute->GetIndirectComputeAddress();
467 if (indirect_address) {
468 // DispatchIndirect
469 static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
470 const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
471 const auto [buffer, offset] =
472 buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
473 scheduler.RequestOutsideRenderPassOperationContext();
474 scheduler.Record([indirect_buffer = buffer->Handle(),
475 indirect_offset = offset](vk::CommandBuffer cmdbuf) {
476 cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset);
477 });
478 return;
479 }
466 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; 480 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
467 scheduler.RequestOutsideRenderPassOperationContext(); 481 scheduler.RequestOutsideRenderPassOperationContext();
468 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); 482 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 78e5a248f..c3f388d89 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -92,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
92 X(vkCmdCopyImage); 92 X(vkCmdCopyImage);
93 X(vkCmdCopyImageToBuffer); 93 X(vkCmdCopyImageToBuffer);
94 X(vkCmdDispatch); 94 X(vkCmdDispatch);
95 X(vkCmdDispatchIndirect);
95 X(vkCmdDraw); 96 X(vkCmdDraw);
96 X(vkCmdDrawIndexed); 97 X(vkCmdDrawIndexed);
97 X(vkCmdDrawIndirect); 98 X(vkCmdDrawIndirect);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index c226a2a29..049fa8038 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -203,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch {
203 PFN_vkCmdCopyImage vkCmdCopyImage{}; 203 PFN_vkCmdCopyImage vkCmdCopyImage{};
204 PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; 204 PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{};
205 PFN_vkCmdDispatch vkCmdDispatch{}; 205 PFN_vkCmdDispatch vkCmdDispatch{};
206 PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{};
206 PFN_vkCmdDraw vkCmdDraw{}; 207 PFN_vkCmdDraw vkCmdDraw{};
207 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; 208 PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
208 PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; 209 PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
@@ -1209,6 +1210,10 @@ public:
1209 dld->vkCmdDispatch(handle, x, y, z); 1210 dld->vkCmdDispatch(handle, x, y, z);
1210 } 1211 }
1211 1212
1213 void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept {
1214 dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset);
1215 }
1216
1212 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, 1217 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1213 VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, 1218 VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers,
1214 Span<VkBufferMemoryBarrier> buffer_barriers, 1219 Span<VkBufferMemoryBarrier> buffer_barriers,