diff options
| author | 2020-01-16 22:55:23 -0300 | |
|---|---|---|
| committer | 2020-01-16 23:05:15 -0300 | |
| commit | fe5356d22363081808db1ba2a84ef7bdf979dad6 (patch) | |
| tree | e647f4a8dca7908f5873b1876cd0821141fee8ae /src | |
| parent | renderer_vulkan: Add header as placeholder (diff) | |
| download | yuzu-fe5356d22363081808db1ba2a84ef7bdf979dad6.tar.gz yuzu-fe5356d22363081808db1ba2a84ef7bdf979dad6.tar.xz yuzu-fe5356d22363081808db1ba2a84ef7bdf979dad6.zip | |
vk_rasterizer: Implement Vulkan's rasterizer
This abstraction is Vulkan's equivalent to OpenGL's rasterizer. It takes
care of joining all parts of the backend and rendering accordingly on
demand.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1135 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 251 |
3 files changed, 1386 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7719eed9..12c46e86f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -172,6 +172,7 @@ if (ENABLE_VULKAN) | |||
| 172 | renderer_vulkan/vk_memory_manager.h | 172 | renderer_vulkan/vk_memory_manager.h |
| 173 | renderer_vulkan/vk_pipeline_cache.cpp | 173 | renderer_vulkan/vk_pipeline_cache.cpp |
| 174 | renderer_vulkan/vk_pipeline_cache.h | 174 | renderer_vulkan/vk_pipeline_cache.h |
| 175 | renderer_vulkan/vk_rasterizer.cpp | ||
| 175 | renderer_vulkan/vk_rasterizer.h | 176 | renderer_vulkan/vk_rasterizer.h |
| 176 | renderer_vulkan/vk_renderpass_cache.cpp | 177 | renderer_vulkan/vk_renderpass_cache.cpp |
| 177 | renderer_vulkan/vk_renderpass_cache.h | 178 | renderer_vulkan/vk_renderpass_cache.h |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..23252e658 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -0,0 +1,1135 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <memory> | ||
| 8 | #include <mutex> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | #include <boost/functional/hash.hpp> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/logging/log.h" | ||
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/memory.h" | ||
| 20 | #include "video_core/engines/kepler_compute.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 35 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 36 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 37 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 38 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 39 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 40 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 41 | |||
| 42 | namespace Vulkan { | ||
| 43 | |||
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 45 | |||
| 46 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); | ||
| 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | ||
| 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | ||
| 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | ||
| 50 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 51 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 52 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 53 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | ||
| 61 | |||
| 62 | vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | ||
| 63 | const auto& viewport = regs.viewport_transform[index]; | ||
| 64 | const float x = viewport.translate_x - viewport.scale_x; | ||
| 65 | const float y = viewport.translate_y - viewport.scale_y; | ||
| 66 | const float width = viewport.scale_x * 2.0f; | ||
| 67 | const float height = viewport.scale_y * 2.0f; | ||
| 68 | |||
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | ||
| 70 | float near = viewport.translate_z - viewport.scale_z * reduce_z; | ||
| 71 | float far = viewport.translate_z + viewport.scale_z; | ||
| 72 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | ||
| 73 | near = std::clamp(near, 0.0f, 1.0f); | ||
| 74 | far = std::clamp(far, 0.0f, 1.0f); | ||
| 75 | } | ||
| 76 | |||
| 77 | return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); | ||
| 78 | } | ||
| 79 | |||
| 80 | constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { | ||
| 81 | const auto& scissor = regs.scissor_test[index]; | ||
| 82 | if (!scissor.enable) { | ||
| 83 | return {{0, 0}, {INT32_MAX, INT32_MAX}}; | ||
| 84 | } | ||
| 85 | const u32 width = scissor.max_x - scissor.min_x; | ||
| 86 | const u32 height = scissor.max_y - scissor.min_y; | ||
| 87 | return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; | ||
| 88 | } | ||
| 89 | |||
| 90 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 91 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 92 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 93 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | ||
| 94 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 95 | } | ||
| 96 | return addresses; | ||
| 97 | } | ||
| 98 | |||
| 99 | void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, | ||
| 100 | vk::AccessFlags access) { | ||
| 101 | for (auto& [view, layout] : views) { | ||
| 102 | view->Transition(*layout, pipeline_stage, access); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | template <typename Engine, typename Entry> | ||
| 107 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | ||
| 108 | std::size_t stage) { | ||
| 109 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 110 | if (entry.IsBindless()) { | ||
| 111 | const Tegra::Texture::TextureHandle tex_handle = | ||
| 112 | engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); | ||
| 113 | return engine.GetTextureInfo(tex_handle); | ||
| 114 | } | ||
| 115 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 116 | return engine.GetStageTexture(stage_type, entry.GetOffset()); | ||
| 117 | } else { | ||
| 118 | return engine.GetTexture(entry.GetOffset()); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | } // Anonymous namespace | ||
| 123 | |||
| 124 | class BufferBindings final { | ||
| 125 | public: | ||
| 126 | void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { | ||
| 127 | vertex.buffer_ptrs[vertex.num_buffers] = buffer; | ||
| 128 | vertex.offsets[vertex.num_buffers] = offset; | ||
| 129 | ++vertex.num_buffers; | ||
| 130 | } | ||
| 131 | |||
| 132 | void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { | ||
| 133 | index.buffer = buffer; | ||
| 134 | index.offset = offset; | ||
| 135 | index.type = type; | ||
| 136 | } | ||
| 137 | |||
| 138 | void Bind(VKScheduler& scheduler) const { | ||
| 139 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 140 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 141 | switch (vertex.num_buffers) { | ||
| 142 | case 0: | ||
| 143 | return BindStatic<0>(scheduler); | ||
| 144 | case 1: | ||
| 145 | return BindStatic<1>(scheduler); | ||
| 146 | case 2: | ||
| 147 | return BindStatic<2>(scheduler); | ||
| 148 | case 3: | ||
| 149 | return BindStatic<3>(scheduler); | ||
| 150 | case 4: | ||
| 151 | return BindStatic<4>(scheduler); | ||
| 152 | case 5: | ||
| 153 | return BindStatic<5>(scheduler); | ||
| 154 | case 6: | ||
| 155 | return BindStatic<6>(scheduler); | ||
| 156 | case 7: | ||
| 157 | return BindStatic<7>(scheduler); | ||
| 158 | case 8: | ||
| 159 | return BindStatic<8>(scheduler); | ||
| 160 | case 9: | ||
| 161 | return BindStatic<9>(scheduler); | ||
| 162 | case 10: | ||
| 163 | return BindStatic<10>(scheduler); | ||
| 164 | case 11: | ||
| 165 | return BindStatic<11>(scheduler); | ||
| 166 | case 12: | ||
| 167 | return BindStatic<12>(scheduler); | ||
| 168 | case 13: | ||
| 169 | return BindStatic<13>(scheduler); | ||
| 170 | case 14: | ||
| 171 | return BindStatic<14>(scheduler); | ||
| 172 | case 15: | ||
| 173 | return BindStatic<15>(scheduler); | ||
| 174 | case 16: | ||
| 175 | return BindStatic<16>(scheduler); | ||
| 176 | case 17: | ||
| 177 | return BindStatic<17>(scheduler); | ||
| 178 | case 18: | ||
| 179 | return BindStatic<18>(scheduler); | ||
| 180 | case 19: | ||
| 181 | return BindStatic<19>(scheduler); | ||
| 182 | case 20: | ||
| 183 | return BindStatic<20>(scheduler); | ||
| 184 | case 21: | ||
| 185 | return BindStatic<21>(scheduler); | ||
| 186 | case 22: | ||
| 187 | return BindStatic<22>(scheduler); | ||
| 188 | case 23: | ||
| 189 | return BindStatic<23>(scheduler); | ||
| 190 | case 24: | ||
| 191 | return BindStatic<24>(scheduler); | ||
| 192 | case 25: | ||
| 193 | return BindStatic<25>(scheduler); | ||
| 194 | case 26: | ||
| 195 | return BindStatic<26>(scheduler); | ||
| 196 | case 27: | ||
| 197 | return BindStatic<27>(scheduler); | ||
| 198 | case 28: | ||
| 199 | return BindStatic<28>(scheduler); | ||
| 200 | case 29: | ||
| 201 | return BindStatic<29>(scheduler); | ||
| 202 | case 30: | ||
| 203 | return BindStatic<30>(scheduler); | ||
| 204 | case 31: | ||
| 205 | return BindStatic<31>(scheduler); | ||
| 206 | } | ||
| 207 | UNREACHABLE(); | ||
| 208 | } | ||
| 209 | |||
| 210 | private: | ||
| 211 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 212 | struct { | ||
| 213 | std::size_t num_buffers = 0; | ||
| 214 | std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; | ||
| 215 | std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 216 | } vertex; | ||
| 217 | |||
| 218 | struct { | ||
| 219 | const vk::Buffer* buffer = nullptr; | ||
| 220 | vk::DeviceSize offset; | ||
| 221 | vk::IndexType type; | ||
| 222 | } index; | ||
| 223 | |||
| 224 | template <std::size_t N> | ||
| 225 | void BindStatic(VKScheduler& scheduler) const { | ||
| 226 | if (index.buffer != nullptr) { | ||
| 227 | BindStatic<N, true>(scheduler); | ||
| 228 | } else { | ||
| 229 | BindStatic<N, false>(scheduler); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | template <std::size_t N, bool is_indexed> | ||
| 234 | void BindStatic(VKScheduler& scheduler) const { | ||
| 235 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 236 | if constexpr (N == 0) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | |||
| 240 | std::array<vk::Buffer, N> buffers; | ||
| 241 | std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), | ||
| 242 | [](const auto ptr) { return *ptr; }); | ||
| 243 | |||
| 244 | std::array<vk::DeviceSize, N> offsets; | ||
| 245 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 246 | |||
| 247 | if constexpr (is_indexed) { | ||
| 248 | // Indexed draw | ||
| 249 | scheduler.Record([buffers, offsets, index_buffer = *index.buffer, | ||
| 250 | index_offset = index.offset, | ||
| 251 | index_type = index.type](auto cmdbuf, auto& dld) { | ||
| 252 | cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); | ||
| 253 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 254 | dld); | ||
| 255 | }); | ||
| 256 | } else { | ||
| 257 | // Array draw | ||
| 258 | scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { | ||
| 259 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 260 | dld); | ||
| 261 | }); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | }; | ||
| 265 | |||
| 266 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, | ||
| 267 | const vk::DispatchLoaderDynamic& dld) const { | ||
| 268 | if (is_indexed) { | ||
| 269 | cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); | ||
| 270 | } else { | ||
| 271 | cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, | ||
| 276 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 277 | VKResourceManager& resource_manager, | ||
| 278 | VKMemoryManager& memory_manager, VKScheduler& scheduler) | ||
| 279 | : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, | ||
| 280 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | ||
| 281 | memory_manager{memory_manager}, scheduler{scheduler}, | ||
| 282 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | ||
| 283 | update_descriptor_queue(device, scheduler), | ||
| 284 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 285 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 286 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | ||
| 287 | staging_pool), | ||
| 288 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | ||
| 289 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||
| 290 | sampler_cache(device) {} | ||
| 291 | |||
| 292 | RasterizerVulkan::~RasterizerVulkan() = default; | ||
| 293 | |||
| 294 | bool RasterizerVulkan::DrawBatch(bool is_indexed) { | ||
| 295 | Draw(is_indexed, false); | ||
| 296 | return true; | ||
| 297 | } | ||
| 298 | |||
| 299 | bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { | ||
| 300 | Draw(is_indexed, true); | ||
| 301 | return true; | ||
| 302 | } | ||
| 303 | |||
| 304 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | ||
| 305 | MICROPROFILE_SCOPE(Vulkan_Drawing); | ||
| 306 | |||
| 307 | FlushWork(); | ||
| 308 | |||
| 309 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 310 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | ||
| 311 | |||
| 312 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | ||
| 313 | |||
| 314 | BufferBindings buffer_bindings; | ||
| 315 | const DrawParameters draw_params = | ||
| 316 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 317 | |||
| 318 | update_descriptor_queue.Acquire(); | ||
| 319 | sampled_views.clear(); | ||
| 320 | image_views.clear(); | ||
| 321 | |||
| 322 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 323 | key.shaders = GetShaderAddresses(shaders); | ||
| 324 | SetupShaderDescriptors(shaders); | ||
| 325 | |||
| 326 | buffer_cache.Unmap(); | ||
| 327 | |||
| 328 | const auto texceptions = UpdateAttachments(); | ||
| 329 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | ||
| 330 | |||
| 331 | key.renderpass_params = GetRenderPassParams(texceptions); | ||
| 332 | |||
| 333 | auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); | ||
| 334 | scheduler.BindGraphicsPipeline(pipeline.GetHandle()); | ||
| 335 | |||
| 336 | const auto renderpass = pipeline.GetRenderPass(); | ||
| 337 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 338 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); | ||
| 339 | |||
| 340 | UpdateDynamicStates(); | ||
| 341 | |||
| 342 | buffer_bindings.Bind(scheduler); | ||
| 343 | |||
| 344 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 345 | scheduler.Record( | ||
| 346 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | ||
| 347 | } | ||
| 348 | |||
| 349 | const auto pipeline_layout = pipeline.GetLayout(); | ||
| 350 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 351 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | ||
| 352 | if (descriptor_set) { | ||
| 353 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, | ||
| 354 | DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); | ||
| 355 | } | ||
| 356 | draw_params.Draw(cmdbuf, dld); | ||
| 357 | }); | ||
| 358 | } | ||
| 359 | |||
| 360 | void RasterizerVulkan::Clear() { | ||
| 361 | MICROPROFILE_SCOPE(Vulkan_Clearing); | ||
| 362 | |||
| 363 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 364 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | ||
| 365 | return; | ||
| 366 | } | ||
| 367 | |||
| 368 | const auto& regs = gpu.regs; | ||
| 369 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | ||
| 370 | regs.clear_buffers.A; | ||
| 371 | const bool use_depth = regs.clear_buffers.Z; | ||
| 372 | const bool use_stencil = regs.clear_buffers.S; | ||
| 373 | if (!use_color && !use_depth && !use_stencil) { | ||
| 374 | return; | ||
| 375 | } | ||
| 376 | // Clearing images requires to be out of a renderpass | ||
| 377 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 378 | |||
| 379 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | ||
| 380 | |||
| 381 | if (use_color) { | ||
| 382 | View color_view; | ||
| 383 | { | ||
| 384 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 385 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | ||
| 386 | } | ||
| 387 | |||
| 388 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 389 | vk::PipelineStageFlagBits::eTransfer, | ||
| 390 | vk::AccessFlagBits::eTransferWrite); | ||
| 391 | |||
| 392 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | ||
| 393 | regs.clear_color[2], regs.clear_color[3]}; | ||
| 394 | const vk::ClearColorValue clear(clear_color); | ||
| 395 | scheduler.Record([image = color_view->GetImage(), | ||
| 396 | subresource = color_view->GetImageSubresourceRange(), | ||
| 397 | clear](auto cmdbuf, auto& dld) { | ||
| 398 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | ||
| 399 | dld); | ||
| 400 | }); | ||
| 401 | } | ||
| 402 | if (use_depth || use_stencil) { | ||
| 403 | View zeta_surface; | ||
| 404 | { | ||
| 405 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 406 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 407 | } | ||
| 408 | |||
| 409 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 410 | vk::PipelineStageFlagBits::eTransfer, | ||
| 411 | vk::AccessFlagBits::eTransferWrite); | ||
| 412 | |||
| 413 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | ||
| 414 | static_cast<u32>(regs.clear_stencil)); | ||
| 415 | scheduler.Record([image = zeta_surface->GetImage(), | ||
| 416 | subresource = zeta_surface->GetImageSubresourceRange(), | ||
| 417 | clear](auto cmdbuf, auto& dld) { | ||
| 418 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 419 | subresource, dld); | ||
| 420 | }); | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | ||
| 425 | MICROPROFILE_SCOPE(Vulkan_Compute); | ||
| 426 | update_descriptor_queue.Acquire(); | ||
| 427 | sampled_views.clear(); | ||
| 428 | image_views.clear(); | ||
| 429 | |||
| 430 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 431 | const ComputePipelineCacheKey key{ | ||
| 432 | code_addr, | ||
| 433 | launch_desc.shared_alloc, | ||
| 434 | {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; | ||
| 435 | auto& pipeline = pipeline_cache.GetComputePipeline(key); | ||
| 436 | |||
| 437 | // Compute dispatches can't be executed inside a renderpass | ||
| 438 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 439 | |||
| 440 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 441 | |||
| 442 | const auto& entries = pipeline.GetEntries(); | ||
| 443 | SetupComputeConstBuffers(entries); | ||
| 444 | SetupComputeGlobalBuffers(entries); | ||
| 445 | SetupComputeTexelBuffers(entries); | ||
| 446 | SetupComputeTextures(entries); | ||
| 447 | SetupComputeImages(entries); | ||
| 448 | |||
| 449 | buffer_cache.Unmap(); | ||
| 450 | |||
| 451 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 452 | vk::AccessFlagBits::eShaderRead); | ||
| 453 | TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 454 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 455 | |||
| 456 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 457 | scheduler.Record( | ||
| 458 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); | ||
| 459 | } | ||
| 460 | |||
| 461 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 462 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | ||
| 463 | layout = pipeline.GetLayout(), | ||
| 464 | descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { | ||
| 465 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); | ||
| 466 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, | ||
| 467 | &descriptor_set, 0, nullptr, dld); | ||
| 468 | cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); | ||
| 469 | }); | ||
| 470 | } | ||
| 471 | |||
| 472 | void RasterizerVulkan::FlushAll() {} | ||
| 473 | |||
| 474 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | ||
| 475 | texture_cache.FlushRegion(addr, size); | ||
| 476 | buffer_cache.FlushRegion(addr, size); | ||
| 477 | } | ||
| 478 | |||
| 479 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 480 | texture_cache.InvalidateRegion(addr, size); | ||
| 481 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 482 | buffer_cache.InvalidateRegion(addr, size); | ||
| 483 | } | ||
| 484 | |||
| 485 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||
| 486 | FlushRegion(addr, size); | ||
| 487 | InvalidateRegion(addr, size); | ||
| 488 | } | ||
| 489 | |||
| 490 | void RasterizerVulkan::FlushCommands() { | ||
| 491 | if (draw_counter > 0) { | ||
| 492 | draw_counter = 0; | ||
| 493 | scheduler.Flush(); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | void RasterizerVulkan::TickFrame() { | ||
| 498 | draw_counter = 0; | ||
| 499 | update_descriptor_queue.TickFrame(); | ||
| 500 | buffer_cache.TickFrame(); | ||
| 501 | staging_pool.TickFrame(); | ||
| 502 | } | ||
| 503 | |||
| 504 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 505 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 506 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 507 | texture_cache.DoFermiCopy(src, dst, copy_config); | ||
| 508 | return true; | ||
| 509 | } | ||
| 510 | |||
| 511 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||
| 512 | VAddr framebuffer_addr, u32 pixel_stride) { | ||
| 513 | if (!framebuffer_addr) { | ||
| 514 | return false; | ||
| 515 | } | ||
| 516 | |||
| 517 | const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | ||
| 518 | const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||
| 519 | if (!surface) { | ||
| 520 | return false; | ||
| 521 | } | ||
| 522 | |||
| 523 | // Verify that the cached surface is the same size and format as the requested framebuffer | ||
| 524 | const auto& params{surface->GetSurfaceParams()}; | ||
| 525 | const auto& pixel_format{ | ||
| 526 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 527 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | ||
| 528 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | ||
| 529 | // ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); | ||
| 530 | |||
| 531 | screen_info.image = &surface->GetImage(); | ||
| 532 | screen_info.width = params.width; | ||
| 533 | screen_info.height = params.height; | ||
| 534 | screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; | ||
| 535 | return true; | ||
| 536 | } | ||
| 537 | |||
| 538 | void RasterizerVulkan::FlushWork() { | ||
| 539 | if ((++draw_counter & 7) != 7) { | ||
| 540 | return; | ||
| 541 | } | ||
| 542 | if (draw_counter < 4096) { | ||
| 543 | // Flush work to the worker thread every 8 draws | ||
| 544 | scheduler.DispatchWork(); | ||
| 545 | } else { | ||
| 546 | // Flush work to the GPU (and implicitly the worker thread) every N draws | ||
| 547 | scheduler.Flush(); | ||
| 548 | draw_counter = 0; | ||
| 549 | } | ||
| 550 | } | ||
| 551 | |||
| 552 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | ||
| 553 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 554 | auto& dirty = system.GPU().Maxwell3D().dirty; | ||
| 555 | const bool update_rendertargets = dirty.render_settings; | ||
| 556 | dirty.render_settings = false; | ||
| 557 | |||
| 558 | texture_cache.GuardRenderTargets(true); | ||
| 559 | |||
| 560 | Texceptions texceptions; | ||
| 561 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 562 | if (update_rendertargets) { | ||
| 563 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); | ||
| 564 | } | ||
| 565 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | ||
| 566 | texceptions.set(rt); | ||
| 567 | } | ||
| 568 | } | ||
| 569 | |||
| 570 | if (update_rendertargets) { | ||
| 571 | zeta_attachment = texture_cache.GetDepthBufferSurface(true); | ||
| 572 | } | ||
| 573 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | ||
| 574 | texceptions.set(ZETA_TEXCEPTION_INDEX); | ||
| 575 | } | ||
| 576 | |||
| 577 | texture_cache.GuardRenderTargets(false); | ||
| 578 | |||
| 579 | return texceptions; | ||
| 580 | } | ||
| 581 | |||
| 582 | bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { | ||
| 583 | bool overlap = false; | ||
| 584 | for (auto& [view, layout] : sampled_views) { | ||
| 585 | if (!attachment.IsSameSurface(*view)) { | ||
| 586 | continue; | ||
| 587 | } | ||
| 588 | overlap = true; | ||
| 589 | *layout = vk::ImageLayout::eGeneral; | ||
| 590 | } | ||
| 591 | return overlap; | ||
| 592 | } | ||
| 593 | |||
| 594 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( | ||
| 595 | vk::RenderPass renderpass) { | ||
| 596 | FramebufferCacheKey fbkey; | ||
| 597 | fbkey.renderpass = renderpass; | ||
| 598 | fbkey.width = std::numeric_limits<u32>::max(); | ||
| 599 | fbkey.height = std::numeric_limits<u32>::max(); | ||
| 600 | |||
| 601 | const auto MarkAsModifiedAndPush = [&](const View& view) { | ||
| 602 | if (view == nullptr) { | ||
| 603 | return false; | ||
| 604 | } | ||
| 605 | fbkey.views.push_back(view->GetHandle()); | ||
| 606 | fbkey.width = std::min(fbkey.width, view->GetWidth()); | ||
| 607 | fbkey.height = std::min(fbkey.height, view->GetHeight()); | ||
| 608 | return true; | ||
| 609 | }; | ||
| 610 | |||
| 611 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { | ||
| 612 | if (MarkAsModifiedAndPush(color_attachments[index])) { | ||
| 613 | texture_cache.MarkColorBufferInUse(index); | ||
| 614 | } | ||
| 615 | } | ||
| 616 | if (MarkAsModifiedAndPush(zeta_attachment)) { | ||
| 617 | texture_cache.MarkDepthBufferInUse(); | ||
| 618 | } | ||
| 619 | |||
| 620 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); | ||
| 621 | auto& framebuffer = fbentry->second; | ||
| 622 | if (is_cache_miss) { | ||
| 623 | const vk::FramebufferCreateInfo framebuffer_ci( | ||
| 624 | {}, fbkey.renderpass, static_cast<u32>(fbkey.views.size()), fbkey.views.data(), | ||
| 625 | fbkey.width, fbkey.height, 1); | ||
| 626 | const auto dev = device.GetLogical(); | ||
| 627 | const auto& dld = device.GetDispatchLoader(); | ||
| 628 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | ||
| 629 | } | ||
| 630 | |||
| 631 | return {*framebuffer, vk::Extent2D{fbkey.width, fbkey.height}}; | ||
| 632 | } | ||
| 633 | |||
| 634 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 635 | BufferBindings& buffer_bindings, | ||
| 636 | bool is_indexed, | ||
| 637 | bool is_instanced) { | ||
| 638 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 639 | |||
| 640 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 641 | const auto& regs = gpu.regs; | ||
| 642 | |||
| 643 | SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); | ||
| 644 | |||
| 645 | const u32 base_instance = regs.vb_base_instance; | ||
| 646 | const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; | ||
| 647 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 648 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 649 | |||
| 650 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 651 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 652 | |||
| 653 | return params; | ||
| 654 | } | ||
| 655 | |||
| 656 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 657 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 658 | texture_cache.GuardSamplers(true); | ||
| 659 | |||
| 660 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 661 | // Skip VertexA stage | ||
| 662 | const auto& shader = shaders[stage + 1]; | ||
| 663 | if (!shader) { | ||
| 664 | continue; | ||
| 665 | } | ||
| 666 | const auto& entries = shader->GetEntries(); | ||
| 667 | SetupGraphicsConstBuffers(entries, stage); | ||
| 668 | SetupGraphicsGlobalBuffers(entries, stage); | ||
| 669 | SetupGraphicsTexelBuffers(entries, stage); | ||
| 670 | SetupGraphicsTextures(entries, stage); | ||
| 671 | SetupGraphicsImages(entries, stage); | ||
| 672 | } | ||
| 673 | texture_cache.GuardSamplers(false); | ||
| 674 | } | ||
| 675 | |||
| 676 | void RasterizerVulkan::SetupImageTransitions( | ||
| 677 | Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 678 | const View& zeta_attachment) { | ||
| 679 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 680 | vk::AccessFlagBits::eShaderRead); | ||
| 681 | TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 682 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 683 | |||
| 684 | for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { | ||
| 685 | const auto color_attachment = color_attachments[rt]; | ||
| 686 | if (color_attachment == nullptr) { | ||
| 687 | continue; | ||
| 688 | } | ||
| 689 | const auto image_layout = | ||
| 690 | texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; | ||
| 691 | color_attachment->Transition( | ||
| 692 | image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, | ||
| 693 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); | ||
| 694 | } | ||
| 695 | |||
| 696 | if (zeta_attachment != nullptr) { | ||
| 697 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | ||
| 698 | ? vk::ImageLayout::eGeneral | ||
| 699 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | ||
| 700 | zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, | ||
| 701 | vk::AccessFlagBits::eDepthStencilAttachmentRead | | ||
| 702 | vk::AccessFlagBits::eDepthStencilAttachmentWrite); | ||
| 703 | } | ||
| 704 | } | ||
| 705 | |||
| 706 | void RasterizerVulkan::UpdateDynamicStates() { | ||
| 707 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 708 | UpdateViewportsState(gpu); | ||
| 709 | UpdateScissorsState(gpu); | ||
| 710 | UpdateDepthBias(gpu); | ||
| 711 | UpdateBlendConstants(gpu); | ||
| 712 | UpdateDepthBounds(gpu); | ||
| 713 | UpdateStencilFaces(gpu); | ||
| 714 | } | ||
| 715 | |||
| 716 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 717 | BufferBindings& buffer_bindings) { | ||
| 718 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 719 | |||
| 720 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { | ||
| 721 | const auto& attrib = regs.vertex_attrib_format[index]; | ||
| 722 | if (!attrib.IsValid()) { | ||
| 723 | continue; | ||
| 724 | } | ||
| 725 | |||
| 726 | const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 727 | ASSERT(buffer.IsEnabled()); | ||
| 728 | |||
| 729 | vertex_input.attributes[vertex_input.num_attributes++] = | ||
| 730 | FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, | ||
| 731 | attrib.offset); | ||
| 732 | } | ||
| 733 | |||
| 734 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { | ||
| 735 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 736 | if (!vertex_array.IsEnabled()) { | ||
| 737 | continue; | ||
| 738 | } | ||
| 739 | |||
| 740 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 741 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 742 | |||
| 743 | ASSERT(end > start); | ||
| 744 | const std::size_t size{end - start + 1}; | ||
| 745 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | ||
| 746 | |||
| 747 | vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( | ||
| 748 | index, vertex_array.stride, | ||
| 749 | regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); | ||
| 750 | buffer_bindings.AddVertexBinding(buffer, offset); | ||
| 751 | } | ||
| 752 | } | ||
| 753 | |||
| 754 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 755 | bool is_indexed) { | ||
| 756 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 757 | switch (regs.draw.topology) { | ||
| 758 | case Maxwell::PrimitiveTopology::Quads: | ||
| 759 | if (params.is_indexed) { | ||
| 760 | UNIMPLEMENTED(); | ||
| 761 | } else { | ||
| 762 | const auto [buffer, offset] = | ||
| 763 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 764 | buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); | ||
| 765 | params.base_vertex = 0; | ||
| 766 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 767 | params.is_indexed = true; | ||
| 768 | } | ||
| 769 | break; | ||
| 770 | default: { | ||
| 771 | if (!is_indexed) { | ||
| 772 | break; | ||
| 773 | } | ||
| 774 | auto [buffer, offset] = | ||
| 775 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 776 | |||
| 777 | auto format = regs.index_array.format; | ||
| 778 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 779 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 780 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); | ||
| 781 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 782 | } | ||
| 783 | |||
| 784 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 785 | break; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | } | ||
| 789 | |||
| 790 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 791 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 792 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 793 | const auto& shader_stage = gpu.state.shader_stages[stage]; | ||
| 794 | for (const auto& entry : entries.const_buffers) { | ||
| 795 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 800 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 801 | auto& gpu{system.GPU()}; | ||
| 802 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; | ||
| 803 | |||
| 804 | for (const auto& entry : entries.global_buffers) { | ||
| 805 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 806 | SetupGlobalBuffer(entry, addr); | ||
| 807 | } | ||
| 808 | } | ||
| 809 | |||
| 810 | void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 811 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 812 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 813 | for (const auto& entry : entries.texel_buffers) { | ||
| 814 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | ||
| 815 | SetupTexelBuffer(image, entry); | ||
| 816 | } | ||
| 817 | } | ||
| 818 | |||
| 819 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { | ||
| 820 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 821 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 822 | for (const auto& entry : entries.samplers) { | ||
| 823 | const auto texture = GetTextureInfo(gpu, entry, stage); | ||
| 824 | SetupTexture(texture, entry); | ||
| 825 | } | ||
| 826 | } | ||
| 827 | |||
| 828 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | ||
| 829 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 830 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 831 | for (const auto& entry : entries.images) { | ||
| 832 | const auto tic = GetTextureInfo(gpu, entry, stage).tic; | ||
| 833 | SetupImage(tic, entry); | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 838 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 839 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 840 | for (const auto& entry : entries.const_buffers) { | ||
| 841 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 842 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 843 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 844 | buffer.address = config.Address(); | ||
| 845 | buffer.size = config.size; | ||
| 846 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 847 | SetupConstBuffer(entry, buffer); | ||
| 848 | } | ||
| 849 | } | ||
| 850 | |||
| 851 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 852 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 853 | const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; | ||
| 854 | for (const auto& entry : entries.global_buffers) { | ||
| 855 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 856 | SetupGlobalBuffer(entry, addr); | ||
| 857 | } | ||
| 858 | } | ||
| 859 | |||
| 860 | void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { | ||
| 861 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 862 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 863 | for (const auto& entry : entries.texel_buffers) { | ||
| 864 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 865 | SetupTexelBuffer(image, entry); | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 869 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 870 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 871 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 872 | for (const auto& entry : entries.samplers) { | ||
| 873 | const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); | ||
| 874 | SetupTexture(texture, entry); | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 879 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 880 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 881 | for (const auto& entry : entries.images) { | ||
| 882 | const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 883 | SetupImage(tic, entry); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 888 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 889 | // Align the size to avoid bad std140 interactions | ||
| 890 | const std::size_t size = | ||
| 891 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 892 | ASSERT(size <= MaxConstbufferSize); | ||
| 893 | |||
| 894 | const auto [buffer_handle, offset] = | ||
| 895 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | ||
| 896 | |||
| 897 | update_descriptor_queue.AddBuffer(buffer_handle, offset, size); | ||
| 898 | } | ||
| 899 | |||
| 900 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 901 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 902 | const auto actual_addr = memory_manager.Read<u64>(address); | ||
| 903 | const auto size = memory_manager.Read<u32>(address + 8); | ||
| 904 | |||
| 905 | if (size == 0) { | ||
| 906 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because | ||
| 907 | // Vulkan doesn't like empty buffers. | ||
| 908 | constexpr std::size_t dummy_size = 4; | ||
| 909 | const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 910 | update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); | ||
| 911 | return; | ||
| 912 | } | ||
| 913 | |||
| 914 | const auto [buffer, offset] = buffer_cache.UploadMemory( | ||
| 915 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 916 | update_descriptor_queue.AddBuffer(buffer, offset, size); | ||
| 917 | } | ||
| 918 | |||
| 919 | void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, | ||
| 920 | const TexelBufferEntry& entry) { | ||
| 921 | auto view = texture_cache.GetTextureSurface(tic, entry); | ||
| 922 | ASSERT(view->IsBufferView()); | ||
| 923 | |||
| 924 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 925 | } | ||
| 926 | |||
| 927 | void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, | ||
| 928 | const SamplerEntry& entry) { | ||
| 929 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 930 | ASSERT(!view->IsBufferView()); | ||
| 931 | |||
| 932 | const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, | ||
| 933 | texture.tic.z_source, texture.tic.w_source); | ||
| 934 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 935 | update_descriptor_queue.AddSampledImage(sampler, image_view); | ||
| 936 | |||
| 937 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 938 | *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; | ||
| 939 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 940 | } | ||
| 941 | |||
| 942 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | ||
| 943 | auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 944 | |||
| 945 | if (entry.IsWritten()) { | ||
| 946 | view->MarkAsModified(texture_cache.Tick()); | ||
| 947 | } | ||
| 948 | |||
| 949 | UNIMPLEMENTED_IF(tic.IsBuffer()); | ||
| 950 | |||
| 951 | const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 952 | update_descriptor_queue.AddImage(image_view); | ||
| 953 | |||
| 954 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 955 | *image_layout = vk::ImageLayout::eGeneral; | ||
| 956 | image_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 957 | } | ||
| 958 | |||
| 959 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 960 | if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { | ||
| 961 | return; | ||
| 962 | } | ||
| 963 | gpu.dirty.viewport_transform = false; | ||
| 964 | const auto& regs = gpu.regs; | ||
| 965 | const std::array viewports{ | ||
| 966 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), | ||
| 967 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), | ||
| 968 | GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), | ||
| 969 | GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), | ||
| 970 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | ||
| 971 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | ||
| 972 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | ||
| 973 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | ||
| 974 | scheduler.Record([viewports](auto cmdbuf, auto& dld) { | ||
| 975 | cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); | ||
| 976 | }); | ||
| 977 | } | ||
| 978 | |||
| 979 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 980 | if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { | ||
| 981 | return; | ||
| 982 | } | ||
| 983 | gpu.dirty.scissor_test = false; | ||
| 984 | const auto& regs = gpu.regs; | ||
| 985 | const std::array scissors = { | ||
| 986 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | ||
| 987 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | ||
| 988 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | ||
| 989 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | ||
| 990 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | ||
| 991 | GetScissorState(regs, 15)}; | ||
| 992 | scheduler.Record([scissors](auto cmdbuf, auto& dld) { | ||
| 993 | cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); | ||
| 994 | }); | ||
| 995 | } | ||
| 996 | |||
| 997 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { | ||
| 998 | if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { | ||
| 999 | return; | ||
| 1000 | } | ||
| 1001 | gpu.dirty.polygon_offset = false; | ||
| 1002 | const auto& regs = gpu.regs; | ||
| 1003 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, | ||
| 1004 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { | ||
| 1005 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); | ||
| 1006 | }); | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1010 | if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { | ||
| 1011 | return; | ||
| 1012 | } | ||
| 1013 | gpu.dirty.blend_state = false; | ||
| 1014 | const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, | ||
| 1015 | gpu.regs.blend_color.b, gpu.regs.blend_color.a}; | ||
| 1016 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { | ||
| 1017 | cmdbuf.setBlendConstants(blend_color.data(), dld); | ||
| 1018 | }); | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1022 | if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { | ||
| 1023 | return; | ||
| 1024 | } | ||
| 1025 | gpu.dirty.depth_bounds_values = false; | ||
| 1026 | const auto& regs = gpu.regs; | ||
| 1027 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( | ||
| 1028 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1032 | if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { | ||
| 1033 | return; | ||
| 1034 | } | ||
| 1035 | gpu.dirty.stencil_test = false; | ||
| 1036 | const auto& regs = gpu.regs; | ||
| 1037 | if (regs.stencil_two_side_enable) { | ||
| 1038 | // Separate values per face | ||
| 1039 | scheduler.Record( | ||
| 1040 | [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, | ||
| 1041 | front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, | ||
| 1042 | back_write_mask = regs.stencil_back_mask, | ||
| 1043 | back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1044 | // Front face | ||
| 1045 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); | ||
| 1046 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); | ||
| 1047 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); | ||
| 1048 | |||
| 1049 | // Back face | ||
| 1050 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); | ||
| 1051 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); | ||
| 1052 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); | ||
| 1053 | }); | ||
| 1054 | } else { | ||
| 1055 | // Front face defines both faces | ||
| 1056 | scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, | ||
| 1057 | test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1058 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); | ||
| 1059 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); | ||
| 1060 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); | ||
| 1061 | }); | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1066 | std::size_t size = CalculateVertexArraysSize(); | ||
| 1067 | if (is_indexed) { | ||
| 1068 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1069 | } | ||
| 1070 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1071 | return size; | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1075 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1076 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1080 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1081 | |||
| 1082 | std::size_t size = 0; | ||
| 1083 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1084 | // This implementation assumes that all attributes are used in the shader. | ||
| 1085 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1086 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1087 | DEBUG_ASSERT(end > start); | ||
| 1088 | |||
| 1089 | size += (end - start + 1) * regs.vertex_array[index].enable; | ||
| 1090 | } | ||
| 1091 | return size; | ||
| 1092 | } | ||
| 1093 | |||
| 1094 | std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1095 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1096 | return static_cast<std::size_t>(regs.index_array.count) * | ||
| 1097 | static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | std::size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1101 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1102 | if (entry.IsIndirect()) { | ||
| 1103 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1104 | return buffer.size; | ||
| 1105 | } else { | ||
| 1106 | // Buffer is accessed directly, upload just what we use | ||
| 1107 | return entry.GetSize(); | ||
| 1108 | } | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | ||
| 1112 | using namespace VideoCore::Surface; | ||
| 1113 | |||
| 1114 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1115 | RenderPassParams renderpass_params; | ||
| 1116 | |||
| 1117 | for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { | ||
| 1118 | const auto& rendertarget = regs.rt[rt]; | ||
| 1119 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) | ||
| 1120 | continue; | ||
| 1121 | renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ | ||
| 1122 | static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), | ||
| 1123 | texceptions.test(rt)}); | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | renderpass_params.has_zeta = regs.zeta_enable; | ||
| 1127 | if (renderpass_params.has_zeta) { | ||
| 1128 | renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 1129 | renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | return renderpass_params; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fc324952b..2ecc19e7a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -4,10 +4,259 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <memory> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/static_vector.hpp> | ||
| 14 | |||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/memory_manager.h" | ||
| 17 | #include "video_core/rasterizer_accelerated.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 18 | #include "video_core/rasterizer_interface.h" |
| 19 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 33 | |||
| 34 | namespace Core { | ||
| 35 | class System; | ||
| 36 | } | ||
| 37 | |||
| 38 | namespace Core::Frontend { | ||
| 39 | class EmuWindow; | ||
| 40 | } | ||
| 41 | |||
| 42 | namespace Tegra::Engines { | ||
| 43 | class Maxwell3D; | ||
| 44 | } | ||
| 45 | |||
| 46 | namespace Vulkan { | ||
| 47 | |||
| 48 | struct VKScreenInfo; | ||
| 49 | |||
| 50 | using ImageViewsPack = | ||
| 51 | boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; | ||
| 52 | |||
| 53 | struct FramebufferCacheKey { | ||
| 54 | vk::RenderPass renderpass; | ||
| 55 | ImageViewsPack views; | ||
| 56 | u32 width; | ||
| 57 | u32 height; | ||
| 58 | |||
| 59 | std::size_t Hash() const noexcept { | ||
| 60 | std::size_t hash = 0; | ||
| 61 | boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); | ||
| 62 | for (const auto& view : views) { | ||
| 63 | boost::hash_combine(hash, static_cast<VkImageView>(view)); | ||
| 64 | } | ||
| 65 | boost::hash_combine(hash, width); | ||
| 66 | boost::hash_combine(hash, height); | ||
| 67 | return hash; | ||
| 68 | } | ||
| 69 | |||
| 70 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 71 | return std::tie(renderpass, views, width, height) == | ||
| 72 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); | ||
| 73 | } | ||
| 74 | }; | ||
| 75 | |||
| 76 | } // namespace Vulkan | ||
| 77 | |||
| 78 | namespace std { | ||
| 79 | |||
| 80 | template <> | ||
| 81 | struct hash<Vulkan::FramebufferCacheKey> { | ||
| 82 | std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { | ||
| 83 | return k.Hash(); | ||
| 84 | } | ||
| 85 | }; | ||
| 86 | |||
| 87 | } // namespace std | ||
| 8 | 88 | ||
| 9 | namespace Vulkan { | 89 | namespace Vulkan { |
| 10 | 90 | ||
| 11 | class RasterizerVulkan : public VideoCore::RasterizerInterface {}; | 91 | class BufferBindings; |
| 92 | |||
| 93 | struct ImageView { | ||
| 94 | View view; | ||
| 95 | vk::ImageLayout* layout = nullptr; | ||
| 96 | }; | ||
| 97 | |||
| 98 | class RasterizerVulkan : public VideoCore::RasterizerAccelerated { | ||
| 99 | public: | ||
| 100 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | ||
| 101 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 102 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 103 | VKScheduler& scheduler); | ||
| 104 | ~RasterizerVulkan() override; | ||
| 105 | |||
| 106 | bool DrawBatch(bool is_indexed) override; | ||
| 107 | bool DrawMultiBatch(bool is_indexed) override; | ||
| 108 | void Clear() override; | ||
| 109 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 110 | void FlushAll() override; | ||
| 111 | void FlushRegion(CacheAddr addr, u64 size) override; | ||
| 112 | void InvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 113 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 114 | void FlushCommands() override; | ||
| 115 | void TickFrame() override; | ||
| 116 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 117 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 118 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 119 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||
| 120 | u32 pixel_stride) override; | ||
| 121 | |||
| 122 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 123 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 124 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 125 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 126 | |||
| 127 | private: | ||
| 128 | struct DrawParameters { | ||
| 129 | void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; | ||
| 130 | |||
| 131 | u32 base_instance = 0; | ||
| 132 | u32 num_instances = 0; | ||
| 133 | u32 base_vertex = 0; | ||
| 134 | u32 num_vertices = 0; | ||
| 135 | bool is_indexed = 0; | ||
| 136 | }; | ||
| 137 | |||
| 138 | using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; | ||
| 139 | |||
| 140 | static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; | ||
| 141 | |||
| 142 | void Draw(bool is_indexed, bool is_instanced); | ||
| 143 | |||
| 144 | void FlushWork(); | ||
| 145 | |||
| 146 | Texceptions UpdateAttachments(); | ||
| 147 | |||
| 148 | std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); | ||
| 149 | |||
| 150 | /// Setups geometry buffers and state. | ||
| 151 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 152 | bool is_indexed, bool is_instanced); | ||
| 153 | |||
| 154 | /// Setup descriptors in the graphics pipeline. | ||
| 155 | void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); | ||
| 156 | |||
| 157 | void SetupImageTransitions(Texceptions texceptions, | ||
| 158 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 159 | const View& zeta_attachment); | ||
| 160 | |||
| 161 | void UpdateDynamicStates(); | ||
| 162 | |||
| 163 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | ||
| 164 | |||
| 165 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 166 | BufferBindings& buffer_bindings); | ||
| 167 | |||
| 168 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 169 | |||
| 170 | /// Setup constant buffers in the graphics pipeline. | ||
| 171 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 172 | |||
| 173 | /// Setup global buffers in the graphics pipeline. | ||
| 174 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 175 | |||
| 176 | /// Setup texel buffers in the graphics pipeline. | ||
| 177 | void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 178 | |||
| 179 | /// Setup textures in the graphics pipeline. | ||
| 180 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 181 | |||
| 182 | /// Setup images in the graphics pipeline. | ||
| 183 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 184 | |||
| 185 | /// Setup constant buffers in the compute pipeline. | ||
| 186 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 187 | |||
| 188 | /// Setup global buffers in the compute pipeline. | ||
| 189 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 190 | |||
| 191 | /// Setup texel buffers in the compute pipeline. | ||
| 192 | void SetupComputeTexelBuffers(const ShaderEntries& entries); | ||
| 193 | |||
| 194 | /// Setup textures in the compute pipeline. | ||
| 195 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 196 | |||
| 197 | /// Setup images in the compute pipeline. | ||
| 198 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 199 | |||
| 200 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 201 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 202 | |||
| 203 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 204 | |||
| 205 | void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); | ||
| 206 | |||
| 207 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | ||
| 208 | |||
| 209 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 210 | |||
| 211 | void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 212 | void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 213 | void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); | ||
| 214 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); | ||
| 215 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); | ||
| 216 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); | ||
| 217 | |||
| 218 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 219 | |||
| 220 | std::size_t CalculateComputeStreamBufferSize() const; | ||
| 221 | |||
| 222 | std::size_t CalculateVertexArraysSize() const; | ||
| 223 | |||
| 224 | std::size_t CalculateIndexBufferSize() const; | ||
| 225 | |||
| 226 | std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 227 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 228 | |||
| 229 | RenderPassParams GetRenderPassParams(Texceptions texceptions) const; | ||
| 230 | |||
| 231 | Core::System& system; | ||
| 232 | Core::Frontend::EmuWindow& render_window; | ||
| 233 | VKScreenInfo& screen_info; | ||
| 234 | const VKDevice& device; | ||
| 235 | VKResourceManager& resource_manager; | ||
| 236 | VKMemoryManager& memory_manager; | ||
| 237 | VKScheduler& scheduler; | ||
| 238 | |||
| 239 | VKStagingBufferPool staging_pool; | ||
| 240 | VKDescriptorPool descriptor_pool; | ||
| 241 | VKUpdateDescriptorQueue update_descriptor_queue; | ||
| 242 | QuadArrayPass quad_array_pass; | ||
| 243 | Uint8Pass uint8_pass; | ||
| 244 | |||
| 245 | VKTextureCache texture_cache; | ||
| 246 | VKPipelineCache pipeline_cache; | ||
| 247 | VKBufferCache buffer_cache; | ||
| 248 | VKSamplerCache sampler_cache; | ||
| 249 | |||
| 250 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||
| 251 | View zeta_attachment; | ||
| 252 | |||
| 253 | std::vector<ImageView> sampled_views; | ||
| 254 | std::vector<ImageView> image_views; | ||
| 255 | |||
| 256 | u32 draw_counter = 0; | ||
| 257 | |||
| 258 | // TODO(Rodrigo): Invalidate on image destruction | ||
| 259 | std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; | ||
| 260 | }; | ||
| 12 | 261 | ||
| 13 | } // namespace Vulkan | 262 | } // namespace Vulkan |