summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp1141
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h252
4 files changed, 1466 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 729ee4a01..12c46e86f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -153,6 +153,7 @@ if (ENABLE_VULKAN)
153 renderer_vulkan/fixed_pipeline_state.h 153 renderer_vulkan/fixed_pipeline_state.h
154 renderer_vulkan/maxwell_to_vk.cpp 154 renderer_vulkan/maxwell_to_vk.cpp
155 renderer_vulkan/maxwell_to_vk.h 155 renderer_vulkan/maxwell_to_vk.h
156 renderer_vulkan/renderer_vulkan.h
156 renderer_vulkan/vk_buffer_cache.cpp 157 renderer_vulkan/vk_buffer_cache.cpp
157 renderer_vulkan/vk_buffer_cache.h 158 renderer_vulkan/vk_buffer_cache.h
158 renderer_vulkan/vk_compute_pass.cpp 159 renderer_vulkan/vk_compute_pass.cpp
@@ -171,6 +172,7 @@ if (ENABLE_VULKAN)
171 renderer_vulkan/vk_memory_manager.h 172 renderer_vulkan/vk_memory_manager.h
172 renderer_vulkan/vk_pipeline_cache.cpp 173 renderer_vulkan/vk_pipeline_cache.cpp
173 renderer_vulkan/vk_pipeline_cache.h 174 renderer_vulkan/vk_pipeline_cache.h
175 renderer_vulkan/vk_rasterizer.cpp
174 renderer_vulkan/vk_rasterizer.h 176 renderer_vulkan/vk_rasterizer.h
175 renderer_vulkan/vk_renderpass_cache.cpp 177 renderer_vulkan/vk_renderpass_cache.cpp
176 renderer_vulkan/vk_renderpass_cache.h 178 renderer_vulkan/vk_renderpass_cache.h
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
new file mode 100644
index 000000000..a472c5dc9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -0,0 +1,72 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <vector>
9#include "video_core/renderer_base.h"
10#include "video_core/renderer_vulkan/declarations.h"
11
12namespace Core {
13class System;
14}
15
16namespace Vulkan {
17
18class VKBlitScreen;
19class VKDevice;
20class VKFence;
21class VKMemoryManager;
22class VKResourceManager;
23class VKSwapchain;
24class VKScheduler;
25class VKImage;
26
27struct VKScreenInfo {
28 VKImage* image{};
29 u32 width{};
30 u32 height{};
31 bool is_srgb{};
32};
33
34class RendererVulkan final : public VideoCore::RendererBase {
35public:
36 explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
37 ~RendererVulkan() override;
38
39 /// Swap buffers (render frame)
40 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
41
42 /// Initialize the renderer
43 bool Init() override;
44
45 /// Shutdown the renderer
46 void ShutDown() override;
47
48private:
49 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
50 const vk::DispatchLoaderDynamic& dldi);
51
52 bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
53
54 void Report() const;
55
56 Core::System& system;
57
58 vk::Instance instance;
59 vk::SurfaceKHR surface;
60
61 VKScreenInfo screen_info;
62
63 UniqueDebugUtilsMessengerEXT debug_callback;
64 std::unique_ptr<VKDevice> device;
65 std::unique_ptr<VKSwapchain> swapchain;
66 std::unique_ptr<VKMemoryManager> memory_manager;
67 std::unique_ptr<VKResourceManager> resource_manager;
68 std::unique_ptr<VKScheduler> scheduler;
69 std::unique_ptr<VKBlitScreen> blit_screen;
70};
71
72} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
new file mode 100644
index 000000000..d2c6b1189
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -0,0 +1,1141 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <memory>
8#include <mutex>
9#include <vector>
10
11#include <boost/container/static_vector.hpp>
12#include <boost/functional/hash.hpp>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/logging/log.h"
17#include "common/microprofile.h"
18#include "core/core.h"
19#include "core/memory.h"
20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_device.h"
31#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
33#include "video_core/renderer_vulkan/vk_rasterizer.h"
34#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
35#include "video_core/renderer_vulkan/vk_resource_manager.h"
36#include "video_core/renderer_vulkan/vk_sampler_cache.h"
37#include "video_core/renderer_vulkan/vk_scheduler.h"
38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
39#include "video_core/renderer_vulkan/vk_texture_cache.h"
40#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41
42namespace Vulkan {
43
44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45
46MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
50MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
57
58namespace {
59
60constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
61
62vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
63 const auto& viewport = regs.viewport_transform[index];
64 const float x = viewport.translate_x - viewport.scale_x;
65 const float y = viewport.translate_y - viewport.scale_y;
66 const float width = viewport.scale_x * 2.0f;
67 const float height = viewport.scale_y * 2.0f;
68
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
70 float near = viewport.translate_z - viewport.scale_z * reduce_z;
71 float far = viewport.translate_z + viewport.scale_z;
72 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
73 near = std::clamp(near, 0.0f, 1.0f);
74 far = std::clamp(far, 0.0f, 1.0f);
75 }
76
77 return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
78}
79
80constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) {
81 const auto& scissor = regs.scissor_test[index];
82 if (!scissor.enable) {
83 return {{0, 0}, {INT32_MAX, INT32_MAX}};
84 }
85 const u32 width = scissor.max_x - scissor.min_x;
86 const u32 height = scissor.max_y - scissor.min_y;
87 return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
88}
89
90std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
91 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
92 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
93 for (std::size_t i = 0; i < std::size(addresses); ++i) {
94 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
95 }
96 return addresses;
97}
98
99void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage,
100 vk::AccessFlags access) {
101 for (auto& [view, layout] : views) {
102 view->Transition(*layout, pipeline_stage, access);
103 }
104}
105
106template <typename Engine, typename Entry>
107Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
108 std::size_t stage) {
109 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
110 if (entry.IsBindless()) {
111 const Tegra::Texture::TextureHandle tex_handle =
112 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
113 return engine.GetTextureInfo(tex_handle);
114 }
115 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
116 return engine.GetStageTexture(stage_type, entry.GetOffset());
117 } else {
118 return engine.GetTexture(entry.GetOffset());
119 }
120}
121
122} // Anonymous namespace
123
124class BufferBindings final {
125public:
126 void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) {
127 vertex.buffer_ptrs[vertex.num_buffers] = buffer;
128 vertex.offsets[vertex.num_buffers] = offset;
129 ++vertex.num_buffers;
130 }
131
132 void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) {
133 index.buffer = buffer;
134 index.offset = offset;
135 index.type = type;
136 }
137
138 void Bind(VKScheduler& scheduler) const {
139 // Use this large switch case to avoid dispatching more memory in the record lambda than
140 // what we need. It looks horrible, but it's the best we can do on standard C++.
141 switch (vertex.num_buffers) {
142 case 0:
143 return BindStatic<0>(scheduler);
144 case 1:
145 return BindStatic<1>(scheduler);
146 case 2:
147 return BindStatic<2>(scheduler);
148 case 3:
149 return BindStatic<3>(scheduler);
150 case 4:
151 return BindStatic<4>(scheduler);
152 case 5:
153 return BindStatic<5>(scheduler);
154 case 6:
155 return BindStatic<6>(scheduler);
156 case 7:
157 return BindStatic<7>(scheduler);
158 case 8:
159 return BindStatic<8>(scheduler);
160 case 9:
161 return BindStatic<9>(scheduler);
162 case 10:
163 return BindStatic<10>(scheduler);
164 case 11:
165 return BindStatic<11>(scheduler);
166 case 12:
167 return BindStatic<12>(scheduler);
168 case 13:
169 return BindStatic<13>(scheduler);
170 case 14:
171 return BindStatic<14>(scheduler);
172 case 15:
173 return BindStatic<15>(scheduler);
174 case 16:
175 return BindStatic<16>(scheduler);
176 case 17:
177 return BindStatic<17>(scheduler);
178 case 18:
179 return BindStatic<18>(scheduler);
180 case 19:
181 return BindStatic<19>(scheduler);
182 case 20:
183 return BindStatic<20>(scheduler);
184 case 21:
185 return BindStatic<21>(scheduler);
186 case 22:
187 return BindStatic<22>(scheduler);
188 case 23:
189 return BindStatic<23>(scheduler);
190 case 24:
191 return BindStatic<24>(scheduler);
192 case 25:
193 return BindStatic<25>(scheduler);
194 case 26:
195 return BindStatic<26>(scheduler);
196 case 27:
197 return BindStatic<27>(scheduler);
198 case 28:
199 return BindStatic<28>(scheduler);
200 case 29:
201 return BindStatic<29>(scheduler);
202 case 30:
203 return BindStatic<30>(scheduler);
204 case 31:
205 return BindStatic<31>(scheduler);
206 case 32:
207 return BindStatic<32>(scheduler);
208 }
209 UNREACHABLE();
210 }
211
212private:
213 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
214 struct {
215 std::size_t num_buffers = 0;
216 std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs;
217 std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets;
218 } vertex;
219
220 struct {
221 const vk::Buffer* buffer = nullptr;
222 vk::DeviceSize offset;
223 vk::IndexType type;
224 } index;
225
226 template <std::size_t N>
227 void BindStatic(VKScheduler& scheduler) const {
228 if (index.buffer != nullptr) {
229 BindStatic<N, true>(scheduler);
230 } else {
231 BindStatic<N, false>(scheduler);
232 }
233 }
234
235 template <std::size_t N, bool is_indexed>
236 void BindStatic(VKScheduler& scheduler) const {
237 static_assert(N <= Maxwell::NumVertexArrays);
238 if constexpr (N == 0) {
239 return;
240 }
241
242 std::array<vk::Buffer, N> buffers;
243 std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
244 [](const auto ptr) { return *ptr; });
245
246 std::array<vk::DeviceSize, N> offsets;
247 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
248
249 if constexpr (is_indexed) {
250 // Indexed draw
251 scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
252 index_offset = index.offset,
253 index_type = index.type](auto cmdbuf, auto& dld) {
254 cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld);
255 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
256 dld);
257 });
258 } else {
259 // Array draw
260 scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) {
261 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
262 dld);
263 });
264 }
265 }
266};
267
268void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
269 const vk::DispatchLoaderDynamic& dld) const {
270 if (is_indexed) {
271 cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld);
272 } else {
273 cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld);
274 }
275}
276
277RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
278 VKScreenInfo& screen_info, const VKDevice& device,
279 VKResourceManager& resource_manager,
280 VKMemoryManager& memory_manager, VKScheduler& scheduler)
281 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
282 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
283 memory_manager{memory_manager}, scheduler{scheduler},
284 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
285 update_descriptor_queue(device, scheduler),
286 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
287 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
288 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
289 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device) {}
293
294RasterizerVulkan::~RasterizerVulkan() = default;
295
296bool RasterizerVulkan::DrawBatch(bool is_indexed) {
297 Draw(is_indexed, false);
298 return true;
299}
300
301bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
302 Draw(is_indexed, true);
303 return true;
304}
305
306void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
307 MICROPROFILE_SCOPE(Vulkan_Drawing);
308
309 FlushWork();
310
311 const auto& gpu = system.GPU().Maxwell3D();
312 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
313
314 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
315
316 BufferBindings buffer_bindings;
317 const DrawParameters draw_params =
318 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
319
320 update_descriptor_queue.Acquire();
321 sampled_views.clear();
322 image_views.clear();
323
324 const auto shaders = pipeline_cache.GetShaders();
325 key.shaders = GetShaderAddresses(shaders);
326 SetupShaderDescriptors(shaders);
327
328 buffer_cache.Unmap();
329
330 const auto texceptions = UpdateAttachments();
331 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
332
333 key.renderpass_params = GetRenderPassParams(texceptions);
334
335 auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
336 scheduler.BindGraphicsPipeline(pipeline.GetHandle());
337
338 const auto renderpass = pipeline.GetRenderPass();
339 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
340 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
341
342 UpdateDynamicStates();
343
344 buffer_bindings.Bind(scheduler);
345
346 if (device.IsNvDeviceDiagnosticCheckpoints()) {
347 scheduler.Record(
348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
349 }
350
351 const auto pipeline_layout = pipeline.GetLayout();
352 const auto descriptor_set = pipeline.CommitDescriptorSet();
353 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
354 if (descriptor_set) {
355 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout,
356 DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld);
357 }
358 draw_params.Draw(cmdbuf, dld);
359 });
360}
361
362void RasterizerVulkan::Clear() {
363 MICROPROFILE_SCOPE(Vulkan_Clearing);
364
365 const auto& gpu = system.GPU().Maxwell3D();
366 if (!system.GPU().Maxwell3D().ShouldExecute()) {
367 return;
368 }
369
370 const auto& regs = gpu.regs;
371 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
372 regs.clear_buffers.A;
373 const bool use_depth = regs.clear_buffers.Z;
374 const bool use_stencil = regs.clear_buffers.S;
375 if (!use_color && !use_depth && !use_stencil) {
376 return;
377 }
378 // Clearing images requires to be out of a renderpass
379 scheduler.RequestOutsideRenderPassOperationContext();
380
381 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
382
383 if (use_color) {
384 View color_view;
385 {
386 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
387 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
388 }
389
390 color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
391 vk::PipelineStageFlagBits::eTransfer,
392 vk::AccessFlagBits::eTransferWrite);
393
394 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
395 regs.clear_color[2], regs.clear_color[3]};
396 const vk::ClearColorValue clear(clear_color);
397 scheduler.Record([image = color_view->GetImage(),
398 subresource = color_view->GetImageSubresourceRange(),
399 clear](auto cmdbuf, auto& dld) {
400 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
401 dld);
402 });
403 }
404 if (use_depth || use_stencil) {
405 View zeta_surface;
406 {
407 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
408 zeta_surface = texture_cache.GetDepthBufferSurface(false);
409 }
410
411 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
412 vk::PipelineStageFlagBits::eTransfer,
413 vk::AccessFlagBits::eTransferWrite);
414
415 const vk::ClearDepthStencilValue clear(regs.clear_depth,
416 static_cast<u32>(regs.clear_stencil));
417 scheduler.Record([image = zeta_surface->GetImage(),
418 subresource = zeta_surface->GetImageSubresourceRange(),
419 clear](auto cmdbuf, auto& dld) {
420 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
421 subresource, dld);
422 });
423 }
424}
425
426void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
427 MICROPROFILE_SCOPE(Vulkan_Compute);
428 update_descriptor_queue.Acquire();
429 sampled_views.clear();
430 image_views.clear();
431
432 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
433 const ComputePipelineCacheKey key{
434 code_addr,
435 launch_desc.shared_alloc,
436 {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
437 auto& pipeline = pipeline_cache.GetComputePipeline(key);
438
439 // Compute dispatches can't be executed inside a renderpass
440 scheduler.RequestOutsideRenderPassOperationContext();
441
442 buffer_cache.Map(CalculateComputeStreamBufferSize());
443
444 const auto& entries = pipeline.GetEntries();
445 SetupComputeConstBuffers(entries);
446 SetupComputeGlobalBuffers(entries);
447 SetupComputeTexelBuffers(entries);
448 SetupComputeTextures(entries);
449 SetupComputeImages(entries);
450
451 buffer_cache.Unmap();
452
453 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader,
454 vk::AccessFlagBits::eShaderRead);
455 TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader,
456 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
457
458 if (device.IsNvDeviceDiagnosticCheckpoints()) {
459 scheduler.Record(
460 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); });
461 }
462
463 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
464 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
465 layout = pipeline.GetLayout(),
466 descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) {
467 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld);
468 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1,
469 &descriptor_set, 0, nullptr, dld);
470 cmdbuf.dispatch(grid_x, grid_y, grid_z, dld);
471 });
472}
473
474void RasterizerVulkan::FlushAll() {}
475
476void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
477 texture_cache.FlushRegion(addr, size);
478 buffer_cache.FlushRegion(addr, size);
479}
480
481void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
482 texture_cache.InvalidateRegion(addr, size);
483 pipeline_cache.InvalidateRegion(addr, size);
484 buffer_cache.InvalidateRegion(addr, size);
485}
486
487void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
488 FlushRegion(addr, size);
489 InvalidateRegion(addr, size);
490}
491
492void RasterizerVulkan::FlushCommands() {
493 if (draw_counter > 0) {
494 draw_counter = 0;
495 scheduler.Flush();
496 }
497}
498
499void RasterizerVulkan::TickFrame() {
500 draw_counter = 0;
501 update_descriptor_queue.TickFrame();
502 buffer_cache.TickFrame();
503 staging_pool.TickFrame();
504}
505
506bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
507 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
508 const Tegra::Engines::Fermi2D::Config& copy_config) {
509 texture_cache.DoFermiCopy(src, dst, copy_config);
510 return true;
511}
512
513bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
514 VAddr framebuffer_addr, u32 pixel_stride) {
515 if (!framebuffer_addr) {
516 return false;
517 }
518
519 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
520 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
521 if (!surface) {
522 return false;
523 }
524
525 // Verify that the cached surface is the same size and format as the requested framebuffer
526 const auto& params{surface->GetSurfaceParams()};
527 const auto& pixel_format{
528 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
529 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
530 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
531
532 screen_info.image = &surface->GetImage();
533 screen_info.width = params.width;
534 screen_info.height = params.height;
535 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
536 return true;
537}
538
539void RasterizerVulkan::FlushWork() {
540 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
541
542 // Only check multiples of 8 draws
543 static_assert(DRAWS_TO_DISPATCH % 8 == 0);
544 if ((++draw_counter & 7) != 7) {
545 return;
546 }
547
548 if (draw_counter < DRAWS_TO_DISPATCH) {
549 // Send recorded tasks to the worker thread
550 scheduler.DispatchWork();
551 return;
552 }
553
554 // Otherwise (every certain number of draws) flush execution.
555 // This submits commands to the Vulkan driver.
556 scheduler.Flush();
557 draw_counter = 0;
558}
559
560RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
561 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
562 auto& dirty = system.GPU().Maxwell3D().dirty;
563 const bool update_rendertargets = dirty.render_settings;
564 dirty.render_settings = false;
565
566 texture_cache.GuardRenderTargets(true);
567
568 Texceptions texceptions;
569 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
570 if (update_rendertargets) {
571 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
572 }
573 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
574 texceptions.set(rt);
575 }
576 }
577
578 if (update_rendertargets) {
579 zeta_attachment = texture_cache.GetDepthBufferSurface(true);
580 }
581 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
582 texceptions.set(ZETA_TEXCEPTION_INDEX);
583 }
584
585 texture_cache.GuardRenderTargets(false);
586
587 return texceptions;
588}
589
590bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
591 bool overlap = false;
592 for (auto& [view, layout] : sampled_views) {
593 if (!attachment.IsSameSurface(*view)) {
594 continue;
595 }
596 overlap = true;
597 *layout = vk::ImageLayout::eGeneral;
598 }
599 return overlap;
600}
601
602std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
603 vk::RenderPass renderpass) {
604 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
605 std::numeric_limits<u32>::max()};
606
607 const auto MarkAsModifiedAndPush = [&](const View& view) {
608 if (view == nullptr) {
609 return false;
610 }
611 key.views.push_back(view->GetHandle());
612 key.width = std::min(key.width, view->GetWidth());
613 key.height = std::min(key.height, view->GetHeight());
614 return true;
615 };
616
617 for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
618 if (MarkAsModifiedAndPush(color_attachments[index])) {
619 texture_cache.MarkColorBufferInUse(index);
620 }
621 }
622 if (MarkAsModifiedAndPush(zeta_attachment)) {
623 texture_cache.MarkDepthBufferInUse();
624 }
625
626 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
627 auto& framebuffer = fbentry->second;
628 if (is_cache_miss) {
629 const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
630 static_cast<u32>(key.views.size()),
631 key.views.data(), key.width, key.height, 1);
632 const auto dev = device.GetLogical();
633 const auto& dld = device.GetDispatchLoader();
634 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
635 }
636
637 return {*framebuffer, vk::Extent2D{key.width, key.height}};
638}
639
640RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
641 BufferBindings& buffer_bindings,
642 bool is_indexed,
643 bool is_instanced) {
644 MICROPROFILE_SCOPE(Vulkan_Geometry);
645
646 const auto& gpu = system.GPU().Maxwell3D();
647 const auto& regs = gpu.regs;
648
649 SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
650
651 const u32 base_instance = regs.vb_base_instance;
652 const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
653 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
654 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
655
656 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
657 SetupIndexBuffer(buffer_bindings, params, is_indexed);
658
659 return params;
660}
661
662void RasterizerVulkan::SetupShaderDescriptors(
663 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
664 texture_cache.GuardSamplers(true);
665
666 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
667 // Skip VertexA stage
668 const auto& shader = shaders[stage + 1];
669 if (!shader) {
670 continue;
671 }
672 const auto& entries = shader->GetEntries();
673 SetupGraphicsConstBuffers(entries, stage);
674 SetupGraphicsGlobalBuffers(entries, stage);
675 SetupGraphicsTexelBuffers(entries, stage);
676 SetupGraphicsTextures(entries, stage);
677 SetupGraphicsImages(entries, stage);
678 }
679 texture_cache.GuardSamplers(false);
680}
681
682void RasterizerVulkan::SetupImageTransitions(
683 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
684 const View& zeta_attachment) {
685 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics,
686 vk::AccessFlagBits::eShaderRead);
687 TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics,
688 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
689
690 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
691 const auto color_attachment = color_attachments[rt];
692 if (color_attachment == nullptr) {
693 continue;
694 }
695 const auto image_layout =
696 texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal;
697 color_attachment->Transition(
698 image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput,
699 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite);
700 }
701
702 if (zeta_attachment != nullptr) {
703 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
704 ? vk::ImageLayout::eGeneral
705 : vk::ImageLayout::eDepthStencilAttachmentOptimal;
706 zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests,
707 vk::AccessFlagBits::eDepthStencilAttachmentRead |
708 vk::AccessFlagBits::eDepthStencilAttachmentWrite);
709 }
710}
711
712void RasterizerVulkan::UpdateDynamicStates() {
713 auto& gpu = system.GPU().Maxwell3D();
714 UpdateViewportsState(gpu);
715 UpdateScissorsState(gpu);
716 UpdateDepthBias(gpu);
717 UpdateBlendConstants(gpu);
718 UpdateDepthBounds(gpu);
719 UpdateStencilFaces(gpu);
720}
721
722void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
723 BufferBindings& buffer_bindings) {
724 const auto& regs = system.GPU().Maxwell3D().regs;
725
726 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
727 const auto& attrib = regs.vertex_attrib_format[index];
728 if (!attrib.IsValid()) {
729 continue;
730 }
731
732 const auto& buffer = regs.vertex_array[attrib.buffer];
733 ASSERT(buffer.IsEnabled());
734
735 vertex_input.attributes[vertex_input.num_attributes++] =
736 FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
737 attrib.offset);
738 }
739
740 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
741 const auto& vertex_array = regs.vertex_array[index];
742 if (!vertex_array.IsEnabled()) {
743 continue;
744 }
745
746 const GPUVAddr start{vertex_array.StartAddress()};
747 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
748
749 ASSERT(end > start);
750 const std::size_t size{end - start + 1};
751 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
752
753 vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
754 index, vertex_array.stride,
755 regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
756 buffer_bindings.AddVertexBinding(buffer, offset);
757 }
758}
759
760void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
761 bool is_indexed) {
762 const auto& regs = system.GPU().Maxwell3D().regs;
763 switch (regs.draw.topology) {
764 case Maxwell::PrimitiveTopology::Quads:
765 if (params.is_indexed) {
766 UNIMPLEMENTED();
767 } else {
768 const auto [buffer, offset] =
769 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
770 buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32);
771 params.base_vertex = 0;
772 params.num_vertices = params.num_vertices * 6 / 4;
773 params.is_indexed = true;
774 }
775 break;
776 default: {
777 if (!is_indexed) {
778 break;
779 }
780 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
781 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
782
783 auto format = regs.index_array.format;
784 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
785 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
786 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
787 format = Maxwell::IndexFormat::UnsignedShort;
788 }
789
790 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
791 break;
792 }
793 }
794}
795
796void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
797 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
798 const auto& gpu = system.GPU().Maxwell3D();
799 const auto& shader_stage = gpu.state.shader_stages[stage];
800 for (const auto& entry : entries.const_buffers) {
801 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
802 }
803}
804
805void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
806 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
807 auto& gpu{system.GPU()};
808 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
809
810 for (const auto& entry : entries.global_buffers) {
811 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
812 SetupGlobalBuffer(entry, addr);
813 }
814}
815
816void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
817 MICROPROFILE_SCOPE(Vulkan_Textures);
818 const auto& gpu = system.GPU().Maxwell3D();
819 for (const auto& entry : entries.texel_buffers) {
820 const auto image = GetTextureInfo(gpu, entry, stage).tic;
821 SetupTexelBuffer(image, entry);
822 }
823}
824
825void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
826 MICROPROFILE_SCOPE(Vulkan_Textures);
827 const auto& gpu = system.GPU().Maxwell3D();
828 for (const auto& entry : entries.samplers) {
829 const auto texture = GetTextureInfo(gpu, entry, stage);
830 SetupTexture(texture, entry);
831 }
832}
833
834void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
835 MICROPROFILE_SCOPE(Vulkan_Images);
836 const auto& gpu = system.GPU().KeplerCompute();
837 for (const auto& entry : entries.images) {
838 const auto tic = GetTextureInfo(gpu, entry, stage).tic;
839 SetupImage(tic, entry);
840 }
841}
842
843void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
844 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
845 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
846 for (const auto& entry : entries.const_buffers) {
847 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
848 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
849 Tegra::Engines::ConstBufferInfo buffer;
850 buffer.address = config.Address();
851 buffer.size = config.size;
852 buffer.enabled = mask[entry.GetIndex()];
853 SetupConstBuffer(entry, buffer);
854 }
855}
856
857void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
858 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
859 const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
860 for (const auto& entry : entries.global_buffers) {
861 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
862 SetupGlobalBuffer(entry, addr);
863 }
864}
865
866void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
867 MICROPROFILE_SCOPE(Vulkan_Textures);
868 const auto& gpu = system.GPU().KeplerCompute();
869 for (const auto& entry : entries.texel_buffers) {
870 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
871 SetupTexelBuffer(image, entry);
872 }
873}
874
875void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
876 MICROPROFILE_SCOPE(Vulkan_Textures);
877 const auto& gpu = system.GPU().KeplerCompute();
878 for (const auto& entry : entries.samplers) {
879 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
880 SetupTexture(texture, entry);
881 }
882}
883
884void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
885 MICROPROFILE_SCOPE(Vulkan_Images);
886 const auto& gpu = system.GPU().KeplerCompute();
887 for (const auto& entry : entries.images) {
888 const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
889 SetupImage(tic, entry);
890 }
891}
892
893void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
894 const Tegra::Engines::ConstBufferInfo& buffer) {
895 // Align the size to avoid bad std140 interactions
896 const std::size_t size =
897 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
898 ASSERT(size <= MaxConstbufferSize);
899
900 const auto [buffer_handle, offset] =
901 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
902
903 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
904}
905
906void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
907 auto& memory_manager{system.GPU().MemoryManager()};
908 const auto actual_addr = memory_manager.Read<u64>(address);
909 const auto size = memory_manager.Read<u32>(address + 8);
910
911 if (size == 0) {
912 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
913 // Vulkan doesn't like empty buffers.
914 constexpr std::size_t dummy_size = 4;
915 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
916 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
917 return;
918 }
919
920 const auto [buffer, offset] = buffer_cache.UploadMemory(
921 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
922 update_descriptor_queue.AddBuffer(buffer, offset, size);
923}
924
925void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
926 const TexelBufferEntry& entry) {
927 const auto view = texture_cache.GetTextureSurface(tic, entry);
928 ASSERT(view->IsBufferView());
929
930 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
931}
932
933void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
934 const SamplerEntry& entry) {
935 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
936 ASSERT(!view->IsBufferView());
937
938 const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
939 texture.tic.z_source, texture.tic.w_source);
940 const auto sampler = sampler_cache.GetSampler(texture.tsc);
941 update_descriptor_queue.AddSampledImage(sampler, image_view);
942
943 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
944 *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal;
945 sampled_views.push_back(ImageView{std::move(view), image_layout});
946}
947
948void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
949 auto view = texture_cache.GetImageSurface(tic, entry);
950
951 if (entry.IsWritten()) {
952 view->MarkAsModified(texture_cache.Tick());
953 }
954
955 UNIMPLEMENTED_IF(tic.IsBuffer());
956
957 const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
958 update_descriptor_queue.AddImage(image_view);
959
960 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
961 *image_layout = vk::ImageLayout::eGeneral;
962 image_views.push_back(ImageView{std::move(view), image_layout});
963}
964
965void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
966 if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
967 return;
968 }
969 gpu.dirty.viewport_transform = false;
970 const auto& regs = gpu.regs;
971 const std::array viewports{
972 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
973 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
974 GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
975 GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
976 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
977 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
978 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
979 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
980 scheduler.Record([viewports](auto cmdbuf, auto& dld) {
981 cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
982 });
983}
984
985void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
986 if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
987 return;
988 }
989 gpu.dirty.scissor_test = false;
990 const auto& regs = gpu.regs;
991 const std::array scissors = {
992 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
993 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
994 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
995 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
996 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
997 GetScissorState(regs, 15)};
998 scheduler.Record([scissors](auto cmdbuf, auto& dld) {
999 cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
1000 });
1001}
1002
1003void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
1004 if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
1005 return;
1006 }
1007 gpu.dirty.polygon_offset = false;
1008 const auto& regs = gpu.regs;
1009 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1010 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
1011 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
1012 });
1013}
1014
1015void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
1016 if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
1017 return;
1018 }
1019 gpu.dirty.blend_state = false;
1020 const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
1021 gpu.regs.blend_color.b, gpu.regs.blend_color.a};
1022 scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
1023 cmdbuf.setBlendConstants(blend_color.data(), dld);
1024 });
1025}
1026
1027void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
1028 if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
1029 return;
1030 }
1031 gpu.dirty.depth_bounds_values = false;
1032 const auto& regs = gpu.regs;
1033 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1034 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
1035}
1036
1037void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
1038 if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
1039 return;
1040 }
1041 gpu.dirty.stencil_test = false;
1042 const auto& regs = gpu.regs;
1043 if (regs.stencil_two_side_enable) {
1044 // Separate values per face
1045 scheduler.Record(
1046 [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
1047 front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
1048 back_write_mask = regs.stencil_back_mask,
1049 back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1050 // Front face
1051 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld);
1052 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld);
1053 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld);
1054
1055 // Back face
1056 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld);
1057 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld);
1058 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld);
1059 });
1060 } else {
1061 // Front face defines both faces
1062 scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
1063 test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1064 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld);
1065 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld);
1066 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld);
1067 });
1068 }
1069}
1070
1071std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1072 std::size_t size = CalculateVertexArraysSize();
1073 if (is_indexed) {
1074 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1075 }
1076 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1077 return size;
1078}
1079
1080std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1081 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1082 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1083}
1084
1085std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1086 const auto& regs = system.GPU().Maxwell3D().regs;
1087
1088 std::size_t size = 0;
1089 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1090 // This implementation assumes that all attributes are used in the shader.
1091 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1092 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1093 DEBUG_ASSERT(end > start);
1094
1095 size += (end - start + 1) * regs.vertex_array[index].enable;
1096 }
1097 return size;
1098}
1099
1100std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1101 const auto& regs = system.GPU().Maxwell3D().regs;
1102 return static_cast<std::size_t>(regs.index_array.count) *
1103 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
1104}
1105
1106std::size_t RasterizerVulkan::CalculateConstBufferSize(
1107 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1108 if (entry.IsIndirect()) {
1109 // Buffer is accessed indirectly, so upload the entire thing
1110 return buffer.size;
1111 } else {
1112 // Buffer is accessed directly, upload just what we use
1113 return entry.GetSize();
1114 }
1115}
1116
1117RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1118 using namespace VideoCore::Surface;
1119
1120 const auto& regs = system.GPU().Maxwell3D().regs;
1121 RenderPassParams renderpass_params;
1122
1123 for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
1124 const auto& rendertarget = regs.rt[rt];
1125 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
1126 continue;
1127 renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
1128 static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
1129 texceptions.test(rt)});
1130 }
1131
1132 renderpass_params.has_zeta = regs.zeta_enable;
1133 if (renderpass_params.has_zeta) {
1134 renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
1135 renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1136 }
1137
1138 return renderpass_params;
1139}
1140
1141} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index fc324952b..7be71e734 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -4,10 +4,260 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <memory>
10#include <utility>
11#include <vector>
12
13#include <boost/container/static_vector.hpp>
14#include <boost/functional/hash.hpp>
15
16#include "common/common_types.h"
17#include "video_core/memory_manager.h"
18#include "video_core/rasterizer_accelerated.h"
7#include "video_core/rasterizer_interface.h" 19#include "video_core/rasterizer_interface.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_buffer_cache.h"
23#include "video_core/renderer_vulkan/vk_compute_pass.h"
24#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
25#include "video_core/renderer_vulkan/vk_memory_manager.h"
26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_sampler_cache.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h"
31#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
32#include "video_core/renderer_vulkan/vk_texture_cache.h"
33#include "video_core/renderer_vulkan/vk_update_descriptor.h"
34
35namespace Core {
36class System;
37}
38
39namespace Core::Frontend {
40class EmuWindow;
41}
42
43namespace Tegra::Engines {
44class Maxwell3D;
45}
46
47namespace Vulkan {
48
49struct VKScreenInfo;
50
51using ImageViewsPack =
52 boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
53
54struct FramebufferCacheKey {
55 vk::RenderPass renderpass{};
56 u32 width = 0;
57 u32 height = 0;
58 ImageViewsPack views;
59
60 std::size_t Hash() const noexcept {
61 std::size_t hash = 0;
62 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
63 for (const auto& view : views) {
64 boost::hash_combine(hash, static_cast<VkImageView>(view));
65 }
66 boost::hash_combine(hash, width);
67 boost::hash_combine(hash, height);
68 return hash;
69 }
70
71 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
72 return std::tie(renderpass, views, width, height) ==
73 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
74 }
75};
76
77} // namespace Vulkan
78
79namespace std {
80
81template <>
82struct hash<Vulkan::FramebufferCacheKey> {
83 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
84 return k.Hash();
85 }
86};
87
88} // namespace std
8 89
9namespace Vulkan { 90namespace Vulkan {
10 91
11class RasterizerVulkan : public VideoCore::RasterizerInterface {}; 92class BufferBindings;
93
94struct ImageView {
95 View view;
96 vk::ImageLayout* layout = nullptr;
97};
98
99class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
100public:
101 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
102 VKScreenInfo& screen_info, const VKDevice& device,
103 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
104 VKScheduler& scheduler);
105 ~RasterizerVulkan() override;
106
107 bool DrawBatch(bool is_indexed) override;
108 bool DrawMultiBatch(bool is_indexed) override;
109 void Clear() override;
110 void DispatchCompute(GPUVAddr code_addr) override;
111 void FlushAll() override;
112 void FlushRegion(CacheAddr addr, u64 size) override;
113 void InvalidateRegion(CacheAddr addr, u64 size) override;
114 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
115 void FlushCommands() override;
116 void TickFrame() override;
117 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
118 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
119 const Tegra::Engines::Fermi2D::Config& copy_config) override;
120 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
121 u32 pixel_stride) override;
122
123 /// Maximum supported size that a constbuffer can have in bytes.
124 static constexpr std::size_t MaxConstbufferSize = 0x10000;
125 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
126 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
127
128private:
129 struct DrawParameters {
130 void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
131
132 u32 base_instance = 0;
133 u32 num_instances = 0;
134 u32 base_vertex = 0;
135 u32 num_vertices = 0;
136 bool is_indexed = 0;
137 };
138
139 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
140
141 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
142
143 void Draw(bool is_indexed, bool is_instanced);
144
145 void FlushWork();
146
147 Texceptions UpdateAttachments();
148
149 std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
150
151 /// Setups geometry buffers and state.
152 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
153 bool is_indexed, bool is_instanced);
154
155 /// Setup descriptors in the graphics pipeline.
156 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
157
158 void SetupImageTransitions(Texceptions texceptions,
159 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
160 const View& zeta_attachment);
161
162 void UpdateDynamicStates();
163
164 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
165
166 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
167 BufferBindings& buffer_bindings);
168
169 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
170
171 /// Setup constant buffers in the graphics pipeline.
172 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
173
174 /// Setup global buffers in the graphics pipeline.
175 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
176
177 /// Setup texel buffers in the graphics pipeline.
178 void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
179
180 /// Setup textures in the graphics pipeline.
181 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
182
183 /// Setup images in the graphics pipeline.
184 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
185
186 /// Setup constant buffers in the compute pipeline.
187 void SetupComputeConstBuffers(const ShaderEntries& entries);
188
189 /// Setup global buffers in the compute pipeline.
190 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
191
192 /// Setup texel buffers in the compute pipeline.
193 void SetupComputeTexelBuffers(const ShaderEntries& entries);
194
195 /// Setup textures in the compute pipeline.
196 void SetupComputeTextures(const ShaderEntries& entries);
197
198 /// Setup images in the compute pipeline.
199 void SetupComputeImages(const ShaderEntries& entries);
200
201 void SetupConstBuffer(const ConstBufferEntry& entry,
202 const Tegra::Engines::ConstBufferInfo& buffer);
203
204 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
205
206 void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
207
208 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
209
210 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
211
212 void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
213 void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
214 void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
215 void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
216 void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
217 void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
218
219 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
220
221 std::size_t CalculateComputeStreamBufferSize() const;
222
223 std::size_t CalculateVertexArraysSize() const;
224
225 std::size_t CalculateIndexBufferSize() const;
226
227 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
228 const Tegra::Engines::ConstBufferInfo& buffer) const;
229
230 RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
231
232 Core::System& system;
233 Core::Frontend::EmuWindow& render_window;
234 VKScreenInfo& screen_info;
235 const VKDevice& device;
236 VKResourceManager& resource_manager;
237 VKMemoryManager& memory_manager;
238 VKScheduler& scheduler;
239
240 VKStagingBufferPool staging_pool;
241 VKDescriptorPool descriptor_pool;
242 VKUpdateDescriptorQueue update_descriptor_queue;
243 QuadArrayPass quad_array_pass;
244 Uint8Pass uint8_pass;
245
246 VKTextureCache texture_cache;
247 VKPipelineCache pipeline_cache;
248 VKBufferCache buffer_cache;
249 VKSamplerCache sampler_cache;
250
251 std::array<View, Maxwell::NumRenderTargets> color_attachments;
252 View zeta_attachment;
253
254 std::vector<ImageView> sampled_views;
255 std::vector<ImageView> image_views;
256
257 u32 draw_counter = 0;
258
259 // TODO(Rodrigo): Invalidate on image destruction
260 std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
261};
12 262
13} // namespace Vulkan 263} // namespace Vulkan