summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-04-28 02:14:11 -0300
committerGravatar ReinUsesLisp2020-04-28 02:18:12 -0300
commitfe931ac9761a813c8e7d195cf99bf68ff324839c (patch)
tree7a1b2a9c2a85ee0e93a19953f03b50530d085c5f /src
parentMerge pull request #3766 from ReinUsesLisp/renderpass-cache-key (diff)
downloadyuzu-fe931ac9761a813c8e7d195cf99bf68ff324839c.tar.gz
yuzu-fe931ac9761a813c8e7d195cf99bf68ff324839c.tar.xz
yuzu-fe931ac9761a813c8e7d195cf99bf68ff324839c.zip
{maxwell_3d,buffer_cache}: Implement memory barriers using 3D registers
Drop MemoryBarrier from the buffer cache and use Maxwell3D's register WaitForIdle. To implement this on OpenGL we just call glMemoryBarrier with the necessary bits. Vulkan lacks this synchronization primitive, so we set an event and immediately wait for it. This is not a pretty solution, but it's what Vulkan can do without submitting the current command buffer to the queue (which ends up being more expensive on the CPU).
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h6
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.h5
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp1
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h10
13 files changed, 57 insertions, 16 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 510f11089..c86e914be 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -89,10 +89,6 @@ public:
89 map->MarkAsWritten(true); 89 map->MarkAsWritten(true);
90 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 90 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
91 } 91 }
92 } else {
93 if (map->IsWritten()) {
94 WriteBarrier();
95 }
96 } 92 }
97 93
98 return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; 94 return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
@@ -254,8 +250,6 @@ protected:
254 250
255 virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; 251 virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
256 252
257 virtual void WriteBarrier() = 0;
258
259 virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; 253 virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
260 254
261 virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, 255 virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 39e3b66a2..7db055ea0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -184,6 +184,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
184 } 184 }
185 185
186 switch (method) { 186 switch (method) {
187 case MAXWELL3D_REG_INDEX(wait_for_idle): {
188 rasterizer.WaitForIdle();
189 break;
190 }
187 case MAXWELL3D_REG_INDEX(shadow_ram_control): { 191 case MAXWELL3D_REG_INDEX(shadow_ram_control): {
188 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); 192 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument);
189 break; 193 break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 3dfba8197..b026afabe 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -709,7 +709,9 @@ public:
709 709
710 union { 710 union {
711 struct { 711 struct {
712 INSERT_UNION_PADDING_WORDS(0x45); 712 INSERT_UNION_PADDING_WORDS(0x44);
713
714 u32 wait_for_idle;
713 715
714 struct { 716 struct {
715 u32 upload_address; 717 u32 upload_address;
@@ -1535,6 +1537,7 @@ private:
1535 static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \ 1537 static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4, \
1536 "Field " #field_name " has invalid position") 1538 "Field " #field_name " has invalid position")
1537 1539
1540ASSERT_REG_POSITION(wait_for_idle, 0x44);
1538ASSERT_REG_POSITION(macros, 0x45); 1541ASSERT_REG_POSITION(macros, 0x45);
1539ASSERT_REG_POSITION(shadow_ram_control, 0x49); 1542ASSERT_REG_POSITION(shadow_ram_control, 0x49);
1540ASSERT_REG_POSITION(upload, 0x60); 1543ASSERT_REG_POSITION(upload, 0x60);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 603f61952..3cbdac8e7 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -80,6 +80,9 @@ public:
80 /// and invalidated 80 /// and invalidated
81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
82 82
83 /// Notify the host renderer to wait for previous primitive and compute operations.
84 virtual void WaitForIdle() = 0;
85
83 /// Notify the rasterizer to send all written commands to the host GPU. 86 /// Notify the rasterizer to send all written commands to the host GPU.
84 virtual void FlushCommands() = 0; 87 virtual void FlushCommands() = 0;
85 88
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 4efce0de7..d2cab50bd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -51,10 +51,6 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<CachedBufferBlock>(cpu_addr, size); 51 return std::make_shared<CachedBufferBlock>(cpu_addr, size);
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() {
55 glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
56}
57
58GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { 54GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
59 return buffer->GetHandle(); 55 return buffer->GetHandle();
60} 56}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a74817857..a9e86cfc7 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -59,8 +59,6 @@ protected:
59 59
60 GLuint ToHandle(const Buffer& buffer) override; 60 GLuint ToHandle(const Buffer& buffer) override;
61 61
62 void WriteBarrier() override;
63
64 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
65 const u8* data) override; 63 const u8* data) override;
66 64
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6fe155bcc..0d07d2422 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -725,6 +725,17 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
725 InvalidateRegion(addr, size); 725 InvalidateRegion(addr, size);
726} 726}
727 727
728void RasterizerOpenGL::WaitForIdle() {
729 // Place a barrier on everything that is not framebuffer related.
730 // This is related to another flag that is not currently implemented.
731 glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT |
732 GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
733 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT |
734 GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
735 GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT |
736 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
737}
738
728void RasterizerOpenGL::FlushCommands() { 739void RasterizerOpenGL::FlushCommands() {
729 // Only flush when we have commands queued to OpenGL. 740 // Only flush when we have commands queued to OpenGL.
730 if (num_queued_commands == 0) { 741 if (num_queued_commands == 0) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ebd2173eb..a95bc7be2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -75,6 +75,7 @@ public:
75 void SignalSyncPoint(u32 value) override; 75 void SignalSyncPoint(u32 value) override;
76 void ReleaseFences() override; 76 void ReleaseFences() override;
77 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 77 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
78 void WaitForIdle() override;
78 void FlushCommands() override; 79 void FlushCommands() override;
79 void TickFrame() override; 80 void TickFrame() override;
80 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 81 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3cd2e2774..c05c27b32 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -55,8 +55,6 @@ public:
55protected: 55protected:
56 VkBuffer ToHandle(const Buffer& buffer) override; 56 VkBuffer ToHandle(const Buffer& buffer) override;
57 57
58 void WriteBarrier() override {}
59
60 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
61 59
62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 60 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index c821b1229..991ed4385 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -302,7 +302,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
302 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 302 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
303 sampler_cache(device), 303 sampler_cache(device),
304 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), 304 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
305 query_cache(system, *this, device, scheduler) { 305 query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
306 scheduler.SetQueryCache(query_cache); 306 scheduler.SetQueryCache(query_cache);
307} 307}
308 308
@@ -576,6 +576,26 @@ void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
576 InvalidateRegion(addr, size); 576 InvalidateRegion(addr, size);
577} 577}
578 578
579void RasterizerVulkan::WaitForIdle() {
580 // Everything but wait pixel operations. This intentionally includes FRAGMENT_SHADER_BIT because
581 // fragment shaders can still write storage buffers.
582 VkPipelineStageFlags flags =
583 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
584 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
585 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
586 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
587 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
588 if (device.IsExtTransformFeedbackSupported()) {
589 flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
590 }
591
592 scheduler.RequestOutsideRenderPassOperationContext();
593 scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) {
594 cmdbuf.SetEvent(event, flags);
595 cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
596 });
597}
598
579void RasterizerVulkan::FlushCommands() { 599void RasterizerVulkan::FlushCommands() {
580 if (draw_counter > 0) { 600 if (draw_counter > 0) {
581 draw_counter = 0; 601 draw_counter = 0;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index d41a7929e..4f78bbd50 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -127,6 +127,7 @@ public:
127 void SignalSyncPoint(u32 value) override; 127 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override; 128 void ReleaseFences() override;
129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
130 void WaitForIdle() override;
130 void FlushCommands() override; 131 void FlushCommands() override;
131 void TickFrame() override; 132 void TickFrame() override;
132 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 133 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -276,6 +277,7 @@ private:
276 277
277 vk::Buffer default_buffer; 278 vk::Buffer default_buffer;
278 VKMemoryCommit default_buffer_commit; 279 VKMemoryCommit default_buffer_commit;
280 vk::Event wfi_event;
279 281
280 std::array<View, Maxwell::NumRenderTargets> color_attachments; 282 std::array<View, Maxwell::NumRenderTargets> color_attachments;
281 View zeta_attachment; 283 View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 7f5bc1404..2ce9b0626 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -87,6 +87,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
87 X(vkCmdSetStencilReference); 87 X(vkCmdSetStencilReference);
88 X(vkCmdSetStencilWriteMask); 88 X(vkCmdSetStencilWriteMask);
89 X(vkCmdSetViewport); 89 X(vkCmdSetViewport);
90 X(vkCmdWaitEvents);
90 X(vkCreateBuffer); 91 X(vkCreateBuffer);
91 X(vkCreateBufferView); 92 X(vkCreateBufferView);
92 X(vkCreateCommandPool); 93 X(vkCreateCommandPool);
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index bda16a2cb..98937a77a 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -205,6 +205,7 @@ struct DeviceDispatch : public InstanceDispatch {
205 PFN_vkCmdSetStencilReference vkCmdSetStencilReference; 205 PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
206 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; 206 PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask;
207 PFN_vkCmdSetViewport vkCmdSetViewport; 207 PFN_vkCmdSetViewport vkCmdSetViewport;
208 PFN_vkCmdWaitEvents vkCmdWaitEvents;
208 PFN_vkCreateBuffer vkCreateBuffer; 209 PFN_vkCreateBuffer vkCreateBuffer;
209 PFN_vkCreateBufferView vkCreateBufferView; 210 PFN_vkCreateBufferView vkCreateBufferView;
210 PFN_vkCreateCommandPool vkCreateCommandPool; 211 PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -958,6 +959,15 @@ public:
958 dld->vkCmdSetEvent(handle, event, stage_flags); 959 dld->vkCmdSetEvent(handle, event, stage_flags);
959 } 960 }
960 961
962 void WaitEvents(Span<VkEvent> events, VkPipelineStageFlags src_stage_mask,
963 VkPipelineStageFlags dst_stage_mask, Span<VkMemoryBarrier> memory_barriers,
964 Span<VkBufferMemoryBarrier> buffer_barriers,
965 Span<VkImageMemoryBarrier> image_barriers) const noexcept {
966 dld->vkCmdWaitEvents(handle, events.size(), events.data(), src_stage_mask, dst_stage_mask,
967 memory_barriers.size(), memory_barriers.data(), buffer_barriers.size(),
968 buffer_barriers.data(), image_barriers.size(), image_barriers.data());
969 }
970
961 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, 971 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
962 const VkDeviceSize* offsets, 972 const VkDeviceSize* offsets,
963 const VkDeviceSize* sizes) const noexcept { 973 const VkDeviceSize* sizes) const noexcept {