summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp1135
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h251
3 files changed, 1386 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d7719eed9..12c46e86f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -172,6 +172,7 @@ if (ENABLE_VULKAN)
172 renderer_vulkan/vk_memory_manager.h 172 renderer_vulkan/vk_memory_manager.h
173 renderer_vulkan/vk_pipeline_cache.cpp 173 renderer_vulkan/vk_pipeline_cache.cpp
174 renderer_vulkan/vk_pipeline_cache.h 174 renderer_vulkan/vk_pipeline_cache.h
175 renderer_vulkan/vk_rasterizer.cpp
175 renderer_vulkan/vk_rasterizer.h 176 renderer_vulkan/vk_rasterizer.h
176 renderer_vulkan/vk_renderpass_cache.cpp 177 renderer_vulkan/vk_renderpass_cache.cpp
177 renderer_vulkan/vk_renderpass_cache.h 178 renderer_vulkan/vk_renderpass_cache.h
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
new file mode 100644
index 000000000..23252e658
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -0,0 +1,1135 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <memory>
8#include <mutex>
9#include <vector>
10
11#include <boost/container/static_vector.hpp>
12#include <boost/functional/hash.hpp>
13
14#include "common/alignment.h"
15#include "common/assert.h"
16#include "common/logging/log.h"
17#include "common/microprofile.h"
18#include "core/core.h"
19#include "core/memory.h"
20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h"
26#include "video_core/renderer_vulkan/vk_buffer_cache.h"
27#include "video_core/renderer_vulkan/vk_compute_pass.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
29#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
30#include "video_core/renderer_vulkan/vk_device.h"
31#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
33#include "video_core/renderer_vulkan/vk_rasterizer.h"
34#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
35#include "video_core/renderer_vulkan/vk_resource_manager.h"
36#include "video_core/renderer_vulkan/vk_sampler_cache.h"
37#include "video_core/renderer_vulkan/vk_scheduler.h"
38#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
39#include "video_core/renderer_vulkan/vk_texture_cache.h"
40#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41
42namespace Vulkan {
43
44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45
46MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
47MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
48MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
49MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
50MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
51MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
52MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
53MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
54MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
55MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
56MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
57
58namespace {
59
60constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
61
62vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
63 const auto& viewport = regs.viewport_transform[index];
64 const float x = viewport.translate_x - viewport.scale_x;
65 const float y = viewport.translate_y - viewport.scale_y;
66 const float width = viewport.scale_x * 2.0f;
67 const float height = viewport.scale_y * 2.0f;
68
69 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
70 float near = viewport.translate_z - viewport.scale_z * reduce_z;
71 float far = viewport.translate_z + viewport.scale_z;
72 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
73 near = std::clamp(near, 0.0f, 1.0f);
74 far = std::clamp(far, 0.0f, 1.0f);
75 }
76
77 return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
78}
79
80constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) {
81 const auto& scissor = regs.scissor_test[index];
82 if (!scissor.enable) {
83 return {{0, 0}, {INT32_MAX, INT32_MAX}};
84 }
85 const u32 width = scissor.max_x - scissor.min_x;
86 const u32 height = scissor.max_y - scissor.min_y;
87 return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
88}
89
90std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
91 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
92 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
93 for (std::size_t i = 0; i < std::size(addresses); ++i) {
94 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
95 }
96 return addresses;
97}
98
99void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage,
100 vk::AccessFlags access) {
101 for (auto& [view, layout] : views) {
102 view->Transition(*layout, pipeline_stage, access);
103 }
104}
105
106template <typename Engine, typename Entry>
107Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
108 std::size_t stage) {
109 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
110 if (entry.IsBindless()) {
111 const Tegra::Texture::TextureHandle tex_handle =
112 engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
113 return engine.GetTextureInfo(tex_handle);
114 }
115 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
116 return engine.GetStageTexture(stage_type, entry.GetOffset());
117 } else {
118 return engine.GetTexture(entry.GetOffset());
119 }
120}
121
122} // Anonymous namespace
123
124class BufferBindings final {
125public:
126 void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) {
127 vertex.buffer_ptrs[vertex.num_buffers] = buffer;
128 vertex.offsets[vertex.num_buffers] = offset;
129 ++vertex.num_buffers;
130 }
131
132 void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) {
133 index.buffer = buffer;
134 index.offset = offset;
135 index.type = type;
136 }
137
138 void Bind(VKScheduler& scheduler) const {
139 // Use this large switch case to avoid dispatching more memory in the record lambda than
140 // what we need. It looks horrible, but it's the best we can do on standard C++.
141 switch (vertex.num_buffers) {
142 case 0:
143 return BindStatic<0>(scheduler);
144 case 1:
145 return BindStatic<1>(scheduler);
146 case 2:
147 return BindStatic<2>(scheduler);
148 case 3:
149 return BindStatic<3>(scheduler);
150 case 4:
151 return BindStatic<4>(scheduler);
152 case 5:
153 return BindStatic<5>(scheduler);
154 case 6:
155 return BindStatic<6>(scheduler);
156 case 7:
157 return BindStatic<7>(scheduler);
158 case 8:
159 return BindStatic<8>(scheduler);
160 case 9:
161 return BindStatic<9>(scheduler);
162 case 10:
163 return BindStatic<10>(scheduler);
164 case 11:
165 return BindStatic<11>(scheduler);
166 case 12:
167 return BindStatic<12>(scheduler);
168 case 13:
169 return BindStatic<13>(scheduler);
170 case 14:
171 return BindStatic<14>(scheduler);
172 case 15:
173 return BindStatic<15>(scheduler);
174 case 16:
175 return BindStatic<16>(scheduler);
176 case 17:
177 return BindStatic<17>(scheduler);
178 case 18:
179 return BindStatic<18>(scheduler);
180 case 19:
181 return BindStatic<19>(scheduler);
182 case 20:
183 return BindStatic<20>(scheduler);
184 case 21:
185 return BindStatic<21>(scheduler);
186 case 22:
187 return BindStatic<22>(scheduler);
188 case 23:
189 return BindStatic<23>(scheduler);
190 case 24:
191 return BindStatic<24>(scheduler);
192 case 25:
193 return BindStatic<25>(scheduler);
194 case 26:
195 return BindStatic<26>(scheduler);
196 case 27:
197 return BindStatic<27>(scheduler);
198 case 28:
199 return BindStatic<28>(scheduler);
200 case 29:
201 return BindStatic<29>(scheduler);
202 case 30:
203 return BindStatic<30>(scheduler);
204 case 31:
205 return BindStatic<31>(scheduler);
206 }
207 UNREACHABLE();
208 }
209
210private:
211 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
212 struct {
213 std::size_t num_buffers = 0;
214 std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs;
215 std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets;
216 } vertex;
217
218 struct {
219 const vk::Buffer* buffer = nullptr;
220 vk::DeviceSize offset;
221 vk::IndexType type;
222 } index;
223
224 template <std::size_t N>
225 void BindStatic(VKScheduler& scheduler) const {
226 if (index.buffer != nullptr) {
227 BindStatic<N, true>(scheduler);
228 } else {
229 BindStatic<N, false>(scheduler);
230 }
231 }
232
233 template <std::size_t N, bool is_indexed>
234 void BindStatic(VKScheduler& scheduler) const {
235 static_assert(N <= Maxwell::NumVertexArrays);
236 if constexpr (N == 0) {
237 return;
238 }
239
240 std::array<vk::Buffer, N> buffers;
241 std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
242 [](const auto ptr) { return *ptr; });
243
244 std::array<vk::DeviceSize, N> offsets;
245 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
246
247 if constexpr (is_indexed) {
248 // Indexed draw
249 scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
250 index_offset = index.offset,
251 index_type = index.type](auto cmdbuf, auto& dld) {
252 cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld);
253 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
254 dld);
255 });
256 } else {
257 // Array draw
258 scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) {
259 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
260 dld);
261 });
262 }
263 }
264};
265
266void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
267 const vk::DispatchLoaderDynamic& dld) const {
268 if (is_indexed) {
269 cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld);
270 } else {
271 cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld);
272 }
273}
274
275RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
276 VKScreenInfo& screen_info, const VKDevice& device,
277 VKResourceManager& resource_manager,
278 VKMemoryManager& memory_manager, VKScheduler& scheduler)
279 : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
280 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
281 memory_manager{memory_manager}, scheduler{scheduler},
282 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
283 update_descriptor_queue(device, scheduler),
284 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
285 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
286 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
287 staging_pool),
288 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
289 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
290 sampler_cache(device) {}
291
292RasterizerVulkan::~RasterizerVulkan() = default;
293
294bool RasterizerVulkan::DrawBatch(bool is_indexed) {
295 Draw(is_indexed, false);
296 return true;
297}
298
299bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
300 Draw(is_indexed, true);
301 return true;
302}
303
304void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
305 MICROPROFILE_SCOPE(Vulkan_Drawing);
306
307 FlushWork();
308
309 const auto& gpu = system.GPU().Maxwell3D();
310 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
311
312 buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
313
314 BufferBindings buffer_bindings;
315 const DrawParameters draw_params =
316 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
317
318 update_descriptor_queue.Acquire();
319 sampled_views.clear();
320 image_views.clear();
321
322 const auto shaders = pipeline_cache.GetShaders();
323 key.shaders = GetShaderAddresses(shaders);
324 SetupShaderDescriptors(shaders);
325
326 buffer_cache.Unmap();
327
328 const auto texceptions = UpdateAttachments();
329 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
330
331 key.renderpass_params = GetRenderPassParams(texceptions);
332
333 auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
334 scheduler.BindGraphicsPipeline(pipeline.GetHandle());
335
336 const auto renderpass = pipeline.GetRenderPass();
337 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
338 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
339
340 UpdateDynamicStates();
341
342 buffer_bindings.Bind(scheduler);
343
344 if (device.IsNvDeviceDiagnosticCheckpoints()) {
345 scheduler.Record(
346 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
347 }
348
349 const auto pipeline_layout = pipeline.GetLayout();
350 const auto descriptor_set = pipeline.CommitDescriptorSet();
351 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
352 if (descriptor_set) {
353 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout,
354 DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld);
355 }
356 draw_params.Draw(cmdbuf, dld);
357 });
358}
359
360void RasterizerVulkan::Clear() {
361 MICROPROFILE_SCOPE(Vulkan_Clearing);
362
363 const auto& gpu = system.GPU().Maxwell3D();
364 if (!system.GPU().Maxwell3D().ShouldExecute()) {
365 return;
366 }
367
368 const auto& regs = gpu.regs;
369 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
370 regs.clear_buffers.A;
371 const bool use_depth = regs.clear_buffers.Z;
372 const bool use_stencil = regs.clear_buffers.S;
373 if (!use_color && !use_depth && !use_stencil) {
374 return;
375 }
376 // Clearing images requires to be out of a renderpass
377 scheduler.RequestOutsideRenderPassOperationContext();
378
379 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
380
381 if (use_color) {
382 View color_view;
383 {
384 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
385 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
386 }
387
388 color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
389 vk::PipelineStageFlagBits::eTransfer,
390 vk::AccessFlagBits::eTransferWrite);
391
392 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
393 regs.clear_color[2], regs.clear_color[3]};
394 const vk::ClearColorValue clear(clear_color);
395 scheduler.Record([image = color_view->GetImage(),
396 subresource = color_view->GetImageSubresourceRange(),
397 clear](auto cmdbuf, auto& dld) {
398 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
399 dld);
400 });
401 }
402 if (use_depth || use_stencil) {
403 View zeta_surface;
404 {
405 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
406 zeta_surface = texture_cache.GetDepthBufferSurface(false);
407 }
408
409 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
410 vk::PipelineStageFlagBits::eTransfer,
411 vk::AccessFlagBits::eTransferWrite);
412
413 const vk::ClearDepthStencilValue clear(regs.clear_depth,
414 static_cast<u32>(regs.clear_stencil));
415 scheduler.Record([image = zeta_surface->GetImage(),
416 subresource = zeta_surface->GetImageSubresourceRange(),
417 clear](auto cmdbuf, auto& dld) {
418 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
419 subresource, dld);
420 });
421 }
422}
423
424void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
425 MICROPROFILE_SCOPE(Vulkan_Compute);
426 update_descriptor_queue.Acquire();
427 sampled_views.clear();
428 image_views.clear();
429
430 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
431 const ComputePipelineCacheKey key{
432 code_addr,
433 launch_desc.shared_alloc,
434 {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
435 auto& pipeline = pipeline_cache.GetComputePipeline(key);
436
437 // Compute dispatches can't be executed inside a renderpass
438 scheduler.RequestOutsideRenderPassOperationContext();
439
440 buffer_cache.Map(CalculateComputeStreamBufferSize());
441
442 const auto& entries = pipeline.GetEntries();
443 SetupComputeConstBuffers(entries);
444 SetupComputeGlobalBuffers(entries);
445 SetupComputeTexelBuffers(entries);
446 SetupComputeTextures(entries);
447 SetupComputeImages(entries);
448
449 buffer_cache.Unmap();
450
451 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader,
452 vk::AccessFlagBits::eShaderRead);
453 TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader,
454 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
455
456 if (device.IsNvDeviceDiagnosticCheckpoints()) {
457 scheduler.Record(
458 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); });
459 }
460
461 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
462 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
463 layout = pipeline.GetLayout(),
464 descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) {
465 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld);
466 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1,
467 &descriptor_set, 0, nullptr, dld);
468 cmdbuf.dispatch(grid_x, grid_y, grid_z, dld);
469 });
470}
471
472void RasterizerVulkan::FlushAll() {}
473
474void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
475 texture_cache.FlushRegion(addr, size);
476 buffer_cache.FlushRegion(addr, size);
477}
478
479void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
480 texture_cache.InvalidateRegion(addr, size);
481 pipeline_cache.InvalidateRegion(addr, size);
482 buffer_cache.InvalidateRegion(addr, size);
483}
484
485void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
486 FlushRegion(addr, size);
487 InvalidateRegion(addr, size);
488}
489
490void RasterizerVulkan::FlushCommands() {
491 if (draw_counter > 0) {
492 draw_counter = 0;
493 scheduler.Flush();
494 }
495}
496
497void RasterizerVulkan::TickFrame() {
498 draw_counter = 0;
499 update_descriptor_queue.TickFrame();
500 buffer_cache.TickFrame();
501 staging_pool.TickFrame();
502}
503
504bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
505 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
506 const Tegra::Engines::Fermi2D::Config& copy_config) {
507 texture_cache.DoFermiCopy(src, dst, copy_config);
508 return true;
509}
510
511bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
512 VAddr framebuffer_addr, u32 pixel_stride) {
513 if (!framebuffer_addr) {
514 return false;
515 }
516
517 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
518 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
519 if (!surface) {
520 return false;
521 }
522
523 // Verify that the cached surface is the same size and format as the requested framebuffer
524 const auto& params{surface->GetSurfaceParams()};
525 const auto& pixel_format{
526 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
527 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
528 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
529 // ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different");
530
531 screen_info.image = &surface->GetImage();
532 screen_info.width = params.width;
533 screen_info.height = params.height;
534 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
535 return true;
536}
537
538void RasterizerVulkan::FlushWork() {
539 if ((++draw_counter & 7) != 7) {
540 return;
541 }
542 if (draw_counter < 4096) {
543 // Flush work to the worker thread every 8 draws
544 scheduler.DispatchWork();
545 } else {
546 // Flush work to the GPU (and implicitly the worker thread) every N draws
547 scheduler.Flush();
548 draw_counter = 0;
549 }
550}
551
552RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
553 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
554 auto& dirty = system.GPU().Maxwell3D().dirty;
555 const bool update_rendertargets = dirty.render_settings;
556 dirty.render_settings = false;
557
558 texture_cache.GuardRenderTargets(true);
559
560 Texceptions texceptions;
561 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
562 if (update_rendertargets) {
563 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
564 }
565 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
566 texceptions.set(rt);
567 }
568 }
569
570 if (update_rendertargets) {
571 zeta_attachment = texture_cache.GetDepthBufferSurface(true);
572 }
573 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
574 texceptions.set(ZETA_TEXCEPTION_INDEX);
575 }
576
577 texture_cache.GuardRenderTargets(false);
578
579 return texceptions;
580}
581
582bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
583 bool overlap = false;
584 for (auto& [view, layout] : sampled_views) {
585 if (!attachment.IsSameSurface(*view)) {
586 continue;
587 }
588 overlap = true;
589 *layout = vk::ImageLayout::eGeneral;
590 }
591 return overlap;
592}
593
594std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
595 vk::RenderPass renderpass) {
596 FramebufferCacheKey fbkey;
597 fbkey.renderpass = renderpass;
598 fbkey.width = std::numeric_limits<u32>::max();
599 fbkey.height = std::numeric_limits<u32>::max();
600
601 const auto MarkAsModifiedAndPush = [&](const View& view) {
602 if (view == nullptr) {
603 return false;
604 }
605 fbkey.views.push_back(view->GetHandle());
606 fbkey.width = std::min(fbkey.width, view->GetWidth());
607 fbkey.height = std::min(fbkey.height, view->GetHeight());
608 return true;
609 };
610
611 for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
612 if (MarkAsModifiedAndPush(color_attachments[index])) {
613 texture_cache.MarkColorBufferInUse(index);
614 }
615 }
616 if (MarkAsModifiedAndPush(zeta_attachment)) {
617 texture_cache.MarkDepthBufferInUse();
618 }
619
620 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
621 auto& framebuffer = fbentry->second;
622 if (is_cache_miss) {
623 const vk::FramebufferCreateInfo framebuffer_ci(
624 {}, fbkey.renderpass, static_cast<u32>(fbkey.views.size()), fbkey.views.data(),
625 fbkey.width, fbkey.height, 1);
626 const auto dev = device.GetLogical();
627 const auto& dld = device.GetDispatchLoader();
628 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
629 }
630
631 return {*framebuffer, vk::Extent2D{fbkey.width, fbkey.height}};
632}
633
634RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
635 BufferBindings& buffer_bindings,
636 bool is_indexed,
637 bool is_instanced) {
638 MICROPROFILE_SCOPE(Vulkan_Geometry);
639
640 const auto& gpu = system.GPU().Maxwell3D();
641 const auto& regs = gpu.regs;
642
643 SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
644
645 const u32 base_instance = regs.vb_base_instance;
646 const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
647 const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
648 const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
649
650 DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
651 SetupIndexBuffer(buffer_bindings, params, is_indexed);
652
653 return params;
654}
655
656void RasterizerVulkan::SetupShaderDescriptors(
657 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
658 texture_cache.GuardSamplers(true);
659
660 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
661 // Skip VertexA stage
662 const auto& shader = shaders[stage + 1];
663 if (!shader) {
664 continue;
665 }
666 const auto& entries = shader->GetEntries();
667 SetupGraphicsConstBuffers(entries, stage);
668 SetupGraphicsGlobalBuffers(entries, stage);
669 SetupGraphicsTexelBuffers(entries, stage);
670 SetupGraphicsTextures(entries, stage);
671 SetupGraphicsImages(entries, stage);
672 }
673 texture_cache.GuardSamplers(false);
674}
675
676void RasterizerVulkan::SetupImageTransitions(
677 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
678 const View& zeta_attachment) {
679 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics,
680 vk::AccessFlagBits::eShaderRead);
681 TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics,
682 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
683
684 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
685 const auto color_attachment = color_attachments[rt];
686 if (color_attachment == nullptr) {
687 continue;
688 }
689 const auto image_layout =
690 texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal;
691 color_attachment->Transition(
692 image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput,
693 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite);
694 }
695
696 if (zeta_attachment != nullptr) {
697 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
698 ? vk::ImageLayout::eGeneral
699 : vk::ImageLayout::eDepthStencilAttachmentOptimal;
700 zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests,
701 vk::AccessFlagBits::eDepthStencilAttachmentRead |
702 vk::AccessFlagBits::eDepthStencilAttachmentWrite);
703 }
704}
705
706void RasterizerVulkan::UpdateDynamicStates() {
707 auto& gpu = system.GPU().Maxwell3D();
708 UpdateViewportsState(gpu);
709 UpdateScissorsState(gpu);
710 UpdateDepthBias(gpu);
711 UpdateBlendConstants(gpu);
712 UpdateDepthBounds(gpu);
713 UpdateStencilFaces(gpu);
714}
715
716void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
717 BufferBindings& buffer_bindings) {
718 const auto& regs = system.GPU().Maxwell3D().regs;
719
720 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
721 const auto& attrib = regs.vertex_attrib_format[index];
722 if (!attrib.IsValid()) {
723 continue;
724 }
725
726 const auto& buffer = regs.vertex_array[attrib.buffer];
727 ASSERT(buffer.IsEnabled());
728
729 vertex_input.attributes[vertex_input.num_attributes++] =
730 FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
731 attrib.offset);
732 }
733
734 for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
735 const auto& vertex_array = regs.vertex_array[index];
736 if (!vertex_array.IsEnabled()) {
737 continue;
738 }
739
740 const GPUVAddr start{vertex_array.StartAddress()};
741 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
742
743 ASSERT(end > start);
744 const std::size_t size{end - start + 1};
745 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
746
747 vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
748 index, vertex_array.stride,
749 regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
750 buffer_bindings.AddVertexBinding(buffer, offset);
751 }
752}
753
754void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
755 bool is_indexed) {
756 const auto& regs = system.GPU().Maxwell3D().regs;
757 switch (regs.draw.topology) {
758 case Maxwell::PrimitiveTopology::Quads:
759 if (params.is_indexed) {
760 UNIMPLEMENTED();
761 } else {
762 const auto [buffer, offset] =
763 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
764 buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32);
765 params.base_vertex = 0;
766 params.num_vertices = params.num_vertices * 6 / 4;
767 params.is_indexed = true;
768 }
769 break;
770 default: {
771 if (!is_indexed) {
772 break;
773 }
774 auto [buffer, offset] =
775 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
776
777 auto format = regs.index_array.format;
778 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
779 if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
780 std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
781 format = Maxwell::IndexFormat::UnsignedShort;
782 }
783
784 buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
785 break;
786 }
787 }
788}
789
790void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
791 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
792 const auto& gpu = system.GPU().Maxwell3D();
793 const auto& shader_stage = gpu.state.shader_stages[stage];
794 for (const auto& entry : entries.const_buffers) {
795 SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
796 }
797}
798
799void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
800 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
801 auto& gpu{system.GPU()};
802 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
803
804 for (const auto& entry : entries.global_buffers) {
805 const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
806 SetupGlobalBuffer(entry, addr);
807 }
808}
809
810void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
811 MICROPROFILE_SCOPE(Vulkan_Textures);
812 const auto& gpu = system.GPU().Maxwell3D();
813 for (const auto& entry : entries.texel_buffers) {
814 const auto image = GetTextureInfo(gpu, entry, stage).tic;
815 SetupTexelBuffer(image, entry);
816 }
817}
818
819void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
820 MICROPROFILE_SCOPE(Vulkan_Textures);
821 const auto& gpu = system.GPU().Maxwell3D();
822 for (const auto& entry : entries.samplers) {
823 const auto texture = GetTextureInfo(gpu, entry, stage);
824 SetupTexture(texture, entry);
825 }
826}
827
828void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
829 MICROPROFILE_SCOPE(Vulkan_Images);
830 const auto& gpu = system.GPU().KeplerCompute();
831 for (const auto& entry : entries.images) {
832 const auto tic = GetTextureInfo(gpu, entry, stage).tic;
833 SetupImage(tic, entry);
834 }
835}
836
837void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
838 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
839 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
840 for (const auto& entry : entries.const_buffers) {
841 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
842 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
843 Tegra::Engines::ConstBufferInfo buffer;
844 buffer.address = config.Address();
845 buffer.size = config.size;
846 buffer.enabled = mask[entry.GetIndex()];
847 SetupConstBuffer(entry, buffer);
848 }
849}
850
851void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
852 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
853 const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
854 for (const auto& entry : entries.global_buffers) {
855 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
856 SetupGlobalBuffer(entry, addr);
857 }
858}
859
860void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
861 MICROPROFILE_SCOPE(Vulkan_Textures);
862 const auto& gpu = system.GPU().KeplerCompute();
863 for (const auto& entry : entries.texel_buffers) {
864 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
865 SetupTexelBuffer(image, entry);
866 }
867}
868
869void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
870 MICROPROFILE_SCOPE(Vulkan_Textures);
871 const auto& gpu = system.GPU().KeplerCompute();
872 for (const auto& entry : entries.samplers) {
873 const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
874 SetupTexture(texture, entry);
875 }
876}
877
878void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
879 MICROPROFILE_SCOPE(Vulkan_Images);
880 const auto& gpu = system.GPU().KeplerCompute();
881 for (const auto& entry : entries.images) {
882 const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
883 SetupImage(tic, entry);
884 }
885}
886
887void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
888 const Tegra::Engines::ConstBufferInfo& buffer) {
889 // Align the size to avoid bad std140 interactions
890 const std::size_t size =
891 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
892 ASSERT(size <= MaxConstbufferSize);
893
894 const auto [buffer_handle, offset] =
895 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
896
897 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
898}
899
900void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
901 auto& memory_manager{system.GPU().MemoryManager()};
902 const auto actual_addr = memory_manager.Read<u64>(address);
903 const auto size = memory_manager.Read<u32>(address + 8);
904
905 if (size == 0) {
906 // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
907 // Vulkan doesn't like empty buffers.
908 constexpr std::size_t dummy_size = 4;
909 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
910 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
911 return;
912 }
913
914 const auto [buffer, offset] = buffer_cache.UploadMemory(
915 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
916 update_descriptor_queue.AddBuffer(buffer, offset, size);
917}
918
919void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
920 const TexelBufferEntry& entry) {
921 auto view = texture_cache.GetTextureSurface(tic, entry);
922 ASSERT(view->IsBufferView());
923
924 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
925}
926
927void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
928 const SamplerEntry& entry) {
929 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
930 ASSERT(!view->IsBufferView());
931
932 const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
933 texture.tic.z_source, texture.tic.w_source);
934 const auto sampler = sampler_cache.GetSampler(texture.tsc);
935 update_descriptor_queue.AddSampledImage(sampler, image_view);
936
937 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
938 *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal;
939 sampled_views.push_back(ImageView{std::move(view), image_layout});
940}
941
942void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
943 auto view = texture_cache.GetImageSurface(tic, entry);
944
945 if (entry.IsWritten()) {
946 view->MarkAsModified(texture_cache.Tick());
947 }
948
949 UNIMPLEMENTED_IF(tic.IsBuffer());
950
951 const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
952 update_descriptor_queue.AddImage(image_view);
953
954 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
955 *image_layout = vk::ImageLayout::eGeneral;
956 image_views.push_back(ImageView{std::move(view), image_layout});
957}
958
959void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
960 if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
961 return;
962 }
963 gpu.dirty.viewport_transform = false;
964 const auto& regs = gpu.regs;
965 const std::array viewports{
966 GetViewportState(device, regs, 0), GetViewportState(device, regs, 1),
967 GetViewportState(device, regs, 2), GetViewportState(device, regs, 3),
968 GetViewportState(device, regs, 4), GetViewportState(device, regs, 5),
969 GetViewportState(device, regs, 6), GetViewportState(device, regs, 7),
970 GetViewportState(device, regs, 8), GetViewportState(device, regs, 9),
971 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
972 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
973 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
974 scheduler.Record([viewports](auto cmdbuf, auto& dld) {
975 cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
976 });
977}
978
979void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
980 if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
981 return;
982 }
983 gpu.dirty.scissor_test = false;
984 const auto& regs = gpu.regs;
985 const std::array scissors = {
986 GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2),
987 GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5),
988 GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8),
989 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
990 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
991 GetScissorState(regs, 15)};
992 scheduler.Record([scissors](auto cmdbuf, auto& dld) {
993 cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
994 });
995}
996
997void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
998 if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
999 return;
1000 }
1001 gpu.dirty.polygon_offset = false;
1002 const auto& regs = gpu.regs;
1003 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1004 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
1005 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
1006 });
1007}
1008
1009void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
1010 if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
1011 return;
1012 }
1013 gpu.dirty.blend_state = false;
1014 const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
1015 gpu.regs.blend_color.b, gpu.regs.blend_color.a};
1016 scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
1017 cmdbuf.setBlendConstants(blend_color.data(), dld);
1018 });
1019}
1020
1021void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
1022 if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
1023 return;
1024 }
1025 gpu.dirty.depth_bounds_values = false;
1026 const auto& regs = gpu.regs;
1027 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1028 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
1029}
1030
1031void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
1032 if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
1033 return;
1034 }
1035 gpu.dirty.stencil_test = false;
1036 const auto& regs = gpu.regs;
1037 if (regs.stencil_two_side_enable) {
1038 // Separate values per face
1039 scheduler.Record(
1040 [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
1041 front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
1042 back_write_mask = regs.stencil_back_mask,
1043 back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1044 // Front face
1045 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld);
1046 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld);
1047 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld);
1048
1049 // Back face
1050 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld);
1051 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld);
1052 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld);
1053 });
1054 } else {
1055 // Front face defines both faces
1056 scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
1057 test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
1058 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld);
1059 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld);
1060 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld);
1061 });
1062 }
1063}
1064
1065std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1066 std::size_t size = CalculateVertexArraysSize();
1067 if (is_indexed) {
1068 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1069 }
1070 size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
1071 return size;
1072}
1073
1074std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1075 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1076 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1077}
1078
1079std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1080 const auto& regs = system.GPU().Maxwell3D().regs;
1081
1082 std::size_t size = 0;
1083 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1084 // This implementation assumes that all attributes are used in the shader.
1085 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1086 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1087 DEBUG_ASSERT(end > start);
1088
1089 size += (end - start + 1) * regs.vertex_array[index].enable;
1090 }
1091 return size;
1092}
1093
1094std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1095 const auto& regs = system.GPU().Maxwell3D().regs;
1096 return static_cast<std::size_t>(regs.index_array.count) *
1097 static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
1098}
1099
1100std::size_t RasterizerVulkan::CalculateConstBufferSize(
1101 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1102 if (entry.IsIndirect()) {
1103 // Buffer is accessed indirectly, so upload the entire thing
1104 return buffer.size;
1105 } else {
1106 // Buffer is accessed directly, upload just what we use
1107 return entry.GetSize();
1108 }
1109}
1110
1111RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1112 using namespace VideoCore::Surface;
1113
1114 const auto& regs = system.GPU().Maxwell3D().regs;
1115 RenderPassParams renderpass_params;
1116
1117 for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
1118 const auto& rendertarget = regs.rt[rt];
1119 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
1120 continue;
1121 renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
1122 static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
1123 texceptions.test(rt)});
1124 }
1125
1126 renderpass_params.has_zeta = regs.zeta_enable;
1127 if (renderpass_params.has_zeta) {
1128 renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
1129 renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1130 }
1131
1132 return renderpass_params;
1133}
1134
1135} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index fc324952b..2ecc19e7a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -4,10 +4,259 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <memory>
10#include <utility>
11#include <vector>
12
13#include <boost/container/static_vector.hpp>
14
15#include "common/common_types.h"
16#include "video_core/memory_manager.h"
17#include "video_core/rasterizer_accelerated.h"
7#include "video_core/rasterizer_interface.h" 18#include "video_core/rasterizer_interface.h"
19#include "video_core/renderer_vulkan/declarations.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_compute_pass.h"
23#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
24#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
27#include "video_core/renderer_vulkan/vk_resource_manager.h"
28#include "video_core/renderer_vulkan/vk_sampler_cache.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
31#include "video_core/renderer_vulkan/vk_texture_cache.h"
32#include "video_core/renderer_vulkan/vk_update_descriptor.h"
33
34namespace Core {
35class System;
36}
37
38namespace Core::Frontend {
39class EmuWindow;
40}
41
42namespace Tegra::Engines {
43class Maxwell3D;
44}
45
46namespace Vulkan {
47
48struct VKScreenInfo;
49
50using ImageViewsPack =
51 boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
52
53struct FramebufferCacheKey {
54 vk::RenderPass renderpass;
55 ImageViewsPack views;
56 u32 width;
57 u32 height;
58
59 std::size_t Hash() const noexcept {
60 std::size_t hash = 0;
61 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
62 for (const auto& view : views) {
63 boost::hash_combine(hash, static_cast<VkImageView>(view));
64 }
65 boost::hash_combine(hash, width);
66 boost::hash_combine(hash, height);
67 return hash;
68 }
69
70 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
71 return std::tie(renderpass, views, width, height) ==
72 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
73 }
74};
75
76} // namespace Vulkan
77
78namespace std {
79
80template <>
81struct hash<Vulkan::FramebufferCacheKey> {
82 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
83 return k.Hash();
84 }
85};
86
87} // namespace std
8 88
9namespace Vulkan { 89namespace Vulkan {
10 90
11class RasterizerVulkan : public VideoCore::RasterizerInterface {}; 91class BufferBindings;
92
93struct ImageView {
94 View view;
95 vk::ImageLayout* layout = nullptr;
96};
97
98class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
99public:
100 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
101 VKScreenInfo& screen_info, const VKDevice& device,
102 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
103 VKScheduler& scheduler);
104 ~RasterizerVulkan() override;
105
106 bool DrawBatch(bool is_indexed) override;
107 bool DrawMultiBatch(bool is_indexed) override;
108 void Clear() override;
109 void DispatchCompute(GPUVAddr code_addr) override;
110 void FlushAll() override;
111 void FlushRegion(CacheAddr addr, u64 size) override;
112 void InvalidateRegion(CacheAddr addr, u64 size) override;
113 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
114 void FlushCommands() override;
115 void TickFrame() override;
116 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
117 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
118 const Tegra::Engines::Fermi2D::Config& copy_config) override;
119 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
120 u32 pixel_stride) override;
121
122 /// Maximum supported size that a constbuffer can have in bytes.
123 static constexpr std::size_t MaxConstbufferSize = 0x10000;
124 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
125 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
126
127private:
128 struct DrawParameters {
129 void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
130
131 u32 base_instance = 0;
132 u32 num_instances = 0;
133 u32 base_vertex = 0;
134 u32 num_vertices = 0;
135 bool is_indexed = 0;
136 };
137
138 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
139
140 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
141
142 void Draw(bool is_indexed, bool is_instanced);
143
144 void FlushWork();
145
146 Texceptions UpdateAttachments();
147
148 std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
149
150 /// Setups geometry buffers and state.
151 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
152 bool is_indexed, bool is_instanced);
153
154 /// Setup descriptors in the graphics pipeline.
155 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
156
157 void SetupImageTransitions(Texceptions texceptions,
158 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
159 const View& zeta_attachment);
160
161 void UpdateDynamicStates();
162
163 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
164
165 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
166 BufferBindings& buffer_bindings);
167
168 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
169
170 /// Setup constant buffers in the graphics pipeline.
171 void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
172
173 /// Setup global buffers in the graphics pipeline.
174 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
175
176 /// Setup texel buffers in the graphics pipeline.
177 void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
178
179 /// Setup textures in the graphics pipeline.
180 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
181
182 /// Setup images in the graphics pipeline.
183 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
184
185 /// Setup constant buffers in the compute pipeline.
186 void SetupComputeConstBuffers(const ShaderEntries& entries);
187
188 /// Setup global buffers in the compute pipeline.
189 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
190
191 /// Setup texel buffers in the compute pipeline.
192 void SetupComputeTexelBuffers(const ShaderEntries& entries);
193
194 /// Setup textures in the compute pipeline.
195 void SetupComputeTextures(const ShaderEntries& entries);
196
197 /// Setup images in the compute pipeline.
198 void SetupComputeImages(const ShaderEntries& entries);
199
200 void SetupConstBuffer(const ConstBufferEntry& entry,
201 const Tegra::Engines::ConstBufferInfo& buffer);
202
203 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
204
205 void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
206
207 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
208
209 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
210
211 void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
212 void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
213 void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
214 void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
215 void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
216 void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
217
218 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
219
220 std::size_t CalculateComputeStreamBufferSize() const;
221
222 std::size_t CalculateVertexArraysSize() const;
223
224 std::size_t CalculateIndexBufferSize() const;
225
226 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
227 const Tegra::Engines::ConstBufferInfo& buffer) const;
228
229 RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
230
231 Core::System& system;
232 Core::Frontend::EmuWindow& render_window;
233 VKScreenInfo& screen_info;
234 const VKDevice& device;
235 VKResourceManager& resource_manager;
236 VKMemoryManager& memory_manager;
237 VKScheduler& scheduler;
238
239 VKStagingBufferPool staging_pool;
240 VKDescriptorPool descriptor_pool;
241 VKUpdateDescriptorQueue update_descriptor_queue;
242 QuadArrayPass quad_array_pass;
243 Uint8Pass uint8_pass;
244
245 VKTextureCache texture_cache;
246 VKPipelineCache pipeline_cache;
247 VKBufferCache buffer_cache;
248 VKSamplerCache sampler_cache;
249
250 std::array<View, Maxwell::NumRenderTargets> color_attachments;
251 View zeta_attachment;
252
253 std::vector<ImageView> sampled_views;
254 std::vector<ImageView> image_views;
255
256 u32 draw_counter = 0;
257
258 // TODO(Rodrigo): Invalidate on image destruction
259 std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
260};
12 261
13} // namespace Vulkan 262} // namespace Vulkan