diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/renderer_vulkan.h | 72 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 1141 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 252 |
4 files changed, 1466 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 729ee4a01..12c46e86f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -153,6 +153,7 @@ if (ENABLE_VULKAN) | |||
| 153 | renderer_vulkan/fixed_pipeline_state.h | 153 | renderer_vulkan/fixed_pipeline_state.h |
| 154 | renderer_vulkan/maxwell_to_vk.cpp | 154 | renderer_vulkan/maxwell_to_vk.cpp |
| 155 | renderer_vulkan/maxwell_to_vk.h | 155 | renderer_vulkan/maxwell_to_vk.h |
| 156 | renderer_vulkan/renderer_vulkan.h | ||
| 156 | renderer_vulkan/vk_buffer_cache.cpp | 157 | renderer_vulkan/vk_buffer_cache.cpp |
| 157 | renderer_vulkan/vk_buffer_cache.h | 158 | renderer_vulkan/vk_buffer_cache.h |
| 158 | renderer_vulkan/vk_compute_pass.cpp | 159 | renderer_vulkan/vk_compute_pass.cpp |
| @@ -171,6 +172,7 @@ if (ENABLE_VULKAN) | |||
| 171 | renderer_vulkan/vk_memory_manager.h | 172 | renderer_vulkan/vk_memory_manager.h |
| 172 | renderer_vulkan/vk_pipeline_cache.cpp | 173 | renderer_vulkan/vk_pipeline_cache.cpp |
| 173 | renderer_vulkan/vk_pipeline_cache.h | 174 | renderer_vulkan/vk_pipeline_cache.h |
| 175 | renderer_vulkan/vk_rasterizer.cpp | ||
| 174 | renderer_vulkan/vk_rasterizer.h | 176 | renderer_vulkan/vk_rasterizer.h |
| 175 | renderer_vulkan/vk_renderpass_cache.cpp | 177 | renderer_vulkan/vk_renderpass_cache.cpp |
| 176 | renderer_vulkan/vk_renderpass_cache.h | 178 | renderer_vulkan/vk_renderpass_cache.h |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..a472c5dc9 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | #include "video_core/renderer_base.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | |||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace Vulkan { | ||
| 17 | |||
| 18 | class VKBlitScreen; | ||
| 19 | class VKDevice; | ||
| 20 | class VKFence; | ||
| 21 | class VKMemoryManager; | ||
| 22 | class VKResourceManager; | ||
| 23 | class VKSwapchain; | ||
| 24 | class VKScheduler; | ||
| 25 | class VKImage; | ||
| 26 | |||
| 27 | struct VKScreenInfo { | ||
| 28 | VKImage* image{}; | ||
| 29 | u32 width{}; | ||
| 30 | u32 height{}; | ||
| 31 | bool is_srgb{}; | ||
| 32 | }; | ||
| 33 | |||
| 34 | class RendererVulkan final : public VideoCore::RendererBase { | ||
| 35 | public: | ||
| 36 | explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); | ||
| 37 | ~RendererVulkan() override; | ||
| 38 | |||
| 39 | /// Swap buffers (render frame) | ||
| 40 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | ||
| 41 | |||
| 42 | /// Initialize the renderer | ||
| 43 | bool Init() override; | ||
| 44 | |||
| 45 | /// Shutdown the renderer | ||
| 46 | void ShutDown() override; | ||
| 47 | |||
| 48 | private: | ||
| 49 | std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( | ||
| 50 | const vk::DispatchLoaderDynamic& dldi); | ||
| 51 | |||
| 52 | bool PickDevices(const vk::DispatchLoaderDynamic& dldi); | ||
| 53 | |||
| 54 | void Report() const; | ||
| 55 | |||
| 56 | Core::System& system; | ||
| 57 | |||
| 58 | vk::Instance instance; | ||
| 59 | vk::SurfaceKHR surface; | ||
| 60 | |||
| 61 | VKScreenInfo screen_info; | ||
| 62 | |||
| 63 | UniqueDebugUtilsMessengerEXT debug_callback; | ||
| 64 | std::unique_ptr<VKDevice> device; | ||
| 65 | std::unique_ptr<VKSwapchain> swapchain; | ||
| 66 | std::unique_ptr<VKMemoryManager> memory_manager; | ||
| 67 | std::unique_ptr<VKResourceManager> resource_manager; | ||
| 68 | std::unique_ptr<VKScheduler> scheduler; | ||
| 69 | std::unique_ptr<VKBlitScreen> blit_screen; | ||
| 70 | }; | ||
| 71 | |||
| 72 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..d2c6b1189 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -0,0 +1,1141 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <memory> | ||
| 8 | #include <mutex> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | #include <boost/functional/hash.hpp> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/assert.h" | ||
| 16 | #include "common/logging/log.h" | ||
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "core/memory.h" | ||
| 20 | #include "video_core/engines/kepler_compute.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 33 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 34 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 35 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 36 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 37 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 38 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 39 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 40 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 41 | |||
| 42 | namespace Vulkan { | ||
| 43 | |||
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 45 | |||
| 46 | MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); | ||
| 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | ||
| 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | ||
| 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | ||
| 50 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 51 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 52 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 53 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | |||
| 60 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | ||
| 61 | |||
| 62 | vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | ||
| 63 | const auto& viewport = regs.viewport_transform[index]; | ||
| 64 | const float x = viewport.translate_x - viewport.scale_x; | ||
| 65 | const float y = viewport.translate_y - viewport.scale_y; | ||
| 66 | const float width = viewport.scale_x * 2.0f; | ||
| 67 | const float height = viewport.scale_y * 2.0f; | ||
| 68 | |||
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | ||
| 70 | float near = viewport.translate_z - viewport.scale_z * reduce_z; | ||
| 71 | float far = viewport.translate_z + viewport.scale_z; | ||
| 72 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | ||
| 73 | near = std::clamp(near, 0.0f, 1.0f); | ||
| 74 | far = std::clamp(far, 0.0f, 1.0f); | ||
| 75 | } | ||
| 76 | |||
| 77 | return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); | ||
| 78 | } | ||
| 79 | |||
| 80 | constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { | ||
| 81 | const auto& scissor = regs.scissor_test[index]; | ||
| 82 | if (!scissor.enable) { | ||
| 83 | return {{0, 0}, {INT32_MAX, INT32_MAX}}; | ||
| 84 | } | ||
| 85 | const u32 width = scissor.max_x - scissor.min_x; | ||
| 86 | const u32 height = scissor.max_y - scissor.min_y; | ||
| 87 | return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; | ||
| 88 | } | ||
| 89 | |||
| 90 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 91 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 92 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 93 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | ||
| 94 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 95 | } | ||
| 96 | return addresses; | ||
| 97 | } | ||
| 98 | |||
| 99 | void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, | ||
| 100 | vk::AccessFlags access) { | ||
| 101 | for (auto& [view, layout] : views) { | ||
| 102 | view->Transition(*layout, pipeline_stage, access); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | template <typename Engine, typename Entry> | ||
| 107 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | ||
| 108 | std::size_t stage) { | ||
| 109 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 110 | if (entry.IsBindless()) { | ||
| 111 | const Tegra::Texture::TextureHandle tex_handle = | ||
| 112 | engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); | ||
| 113 | return engine.GetTextureInfo(tex_handle); | ||
| 114 | } | ||
| 115 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 116 | return engine.GetStageTexture(stage_type, entry.GetOffset()); | ||
| 117 | } else { | ||
| 118 | return engine.GetTexture(entry.GetOffset()); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | } // Anonymous namespace | ||
| 123 | |||
| 124 | class BufferBindings final { | ||
| 125 | public: | ||
| 126 | void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { | ||
| 127 | vertex.buffer_ptrs[vertex.num_buffers] = buffer; | ||
| 128 | vertex.offsets[vertex.num_buffers] = offset; | ||
| 129 | ++vertex.num_buffers; | ||
| 130 | } | ||
| 131 | |||
| 132 | void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { | ||
| 133 | index.buffer = buffer; | ||
| 134 | index.offset = offset; | ||
| 135 | index.type = type; | ||
| 136 | } | ||
| 137 | |||
| 138 | void Bind(VKScheduler& scheduler) const { | ||
| 139 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 140 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 141 | switch (vertex.num_buffers) { | ||
| 142 | case 0: | ||
| 143 | return BindStatic<0>(scheduler); | ||
| 144 | case 1: | ||
| 145 | return BindStatic<1>(scheduler); | ||
| 146 | case 2: | ||
| 147 | return BindStatic<2>(scheduler); | ||
| 148 | case 3: | ||
| 149 | return BindStatic<3>(scheduler); | ||
| 150 | case 4: | ||
| 151 | return BindStatic<4>(scheduler); | ||
| 152 | case 5: | ||
| 153 | return BindStatic<5>(scheduler); | ||
| 154 | case 6: | ||
| 155 | return BindStatic<6>(scheduler); | ||
| 156 | case 7: | ||
| 157 | return BindStatic<7>(scheduler); | ||
| 158 | case 8: | ||
| 159 | return BindStatic<8>(scheduler); | ||
| 160 | case 9: | ||
| 161 | return BindStatic<9>(scheduler); | ||
| 162 | case 10: | ||
| 163 | return BindStatic<10>(scheduler); | ||
| 164 | case 11: | ||
| 165 | return BindStatic<11>(scheduler); | ||
| 166 | case 12: | ||
| 167 | return BindStatic<12>(scheduler); | ||
| 168 | case 13: | ||
| 169 | return BindStatic<13>(scheduler); | ||
| 170 | case 14: | ||
| 171 | return BindStatic<14>(scheduler); | ||
| 172 | case 15: | ||
| 173 | return BindStatic<15>(scheduler); | ||
| 174 | case 16: | ||
| 175 | return BindStatic<16>(scheduler); | ||
| 176 | case 17: | ||
| 177 | return BindStatic<17>(scheduler); | ||
| 178 | case 18: | ||
| 179 | return BindStatic<18>(scheduler); | ||
| 180 | case 19: | ||
| 181 | return BindStatic<19>(scheduler); | ||
| 182 | case 20: | ||
| 183 | return BindStatic<20>(scheduler); | ||
| 184 | case 21: | ||
| 185 | return BindStatic<21>(scheduler); | ||
| 186 | case 22: | ||
| 187 | return BindStatic<22>(scheduler); | ||
| 188 | case 23: | ||
| 189 | return BindStatic<23>(scheduler); | ||
| 190 | case 24: | ||
| 191 | return BindStatic<24>(scheduler); | ||
| 192 | case 25: | ||
| 193 | return BindStatic<25>(scheduler); | ||
| 194 | case 26: | ||
| 195 | return BindStatic<26>(scheduler); | ||
| 196 | case 27: | ||
| 197 | return BindStatic<27>(scheduler); | ||
| 198 | case 28: | ||
| 199 | return BindStatic<28>(scheduler); | ||
| 200 | case 29: | ||
| 201 | return BindStatic<29>(scheduler); | ||
| 202 | case 30: | ||
| 203 | return BindStatic<30>(scheduler); | ||
| 204 | case 31: | ||
| 205 | return BindStatic<31>(scheduler); | ||
| 206 | case 32: | ||
| 207 | return BindStatic<32>(scheduler); | ||
| 208 | } | ||
| 209 | UNREACHABLE(); | ||
| 210 | } | ||
| 211 | |||
| 212 | private: | ||
| 213 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 214 | struct { | ||
| 215 | std::size_t num_buffers = 0; | ||
| 216 | std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; | ||
| 217 | std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 218 | } vertex; | ||
| 219 | |||
| 220 | struct { | ||
| 221 | const vk::Buffer* buffer = nullptr; | ||
| 222 | vk::DeviceSize offset; | ||
| 223 | vk::IndexType type; | ||
| 224 | } index; | ||
| 225 | |||
| 226 | template <std::size_t N> | ||
| 227 | void BindStatic(VKScheduler& scheduler) const { | ||
| 228 | if (index.buffer != nullptr) { | ||
| 229 | BindStatic<N, true>(scheduler); | ||
| 230 | } else { | ||
| 231 | BindStatic<N, false>(scheduler); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | template <std::size_t N, bool is_indexed> | ||
| 236 | void BindStatic(VKScheduler& scheduler) const { | ||
| 237 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 238 | if constexpr (N == 0) { | ||
| 239 | return; | ||
| 240 | } | ||
| 241 | |||
| 242 | std::array<vk::Buffer, N> buffers; | ||
| 243 | std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), | ||
| 244 | [](const auto ptr) { return *ptr; }); | ||
| 245 | |||
| 246 | std::array<vk::DeviceSize, N> offsets; | ||
| 247 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 248 | |||
| 249 | if constexpr (is_indexed) { | ||
| 250 | // Indexed draw | ||
| 251 | scheduler.Record([buffers, offsets, index_buffer = *index.buffer, | ||
| 252 | index_offset = index.offset, | ||
| 253 | index_type = index.type](auto cmdbuf, auto& dld) { | ||
| 254 | cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); | ||
| 255 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 256 | dld); | ||
| 257 | }); | ||
| 258 | } else { | ||
| 259 | // Array draw | ||
| 260 | scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { | ||
| 261 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | ||
| 262 | dld); | ||
| 263 | }); | ||
| 264 | } | ||
| 265 | } | ||
| 266 | }; | ||
| 267 | |||
| 268 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, | ||
| 269 | const vk::DispatchLoaderDynamic& dld) const { | ||
| 270 | if (is_indexed) { | ||
| 271 | cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); | ||
| 272 | } else { | ||
| 273 | cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, | ||
| 278 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 279 | VKResourceManager& resource_manager, | ||
| 280 | VKMemoryManager& memory_manager, VKScheduler& scheduler) | ||
| 281 | : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, | ||
| 282 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | ||
| 283 | memory_manager{memory_manager}, scheduler{scheduler}, | ||
| 284 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | ||
| 285 | update_descriptor_queue(device, scheduler), | ||
| 286 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 287 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 288 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | ||
| 289 | staging_pool), | ||
| 290 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | ||
| 291 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||
| 292 | sampler_cache(device) {} | ||
| 293 | |||
| 294 | RasterizerVulkan::~RasterizerVulkan() = default; | ||
| 295 | |||
| 296 | bool RasterizerVulkan::DrawBatch(bool is_indexed) { | ||
| 297 | Draw(is_indexed, false); | ||
| 298 | return true; | ||
| 299 | } | ||
| 300 | |||
| 301 | bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { | ||
| 302 | Draw(is_indexed, true); | ||
| 303 | return true; | ||
| 304 | } | ||
| 305 | |||
| 306 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | ||
| 307 | MICROPROFILE_SCOPE(Vulkan_Drawing); | ||
| 308 | |||
| 309 | FlushWork(); | ||
| 310 | |||
| 311 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 312 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | ||
| 313 | |||
| 314 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | ||
| 315 | |||
| 316 | BufferBindings buffer_bindings; | ||
| 317 | const DrawParameters draw_params = | ||
| 318 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 319 | |||
| 320 | update_descriptor_queue.Acquire(); | ||
| 321 | sampled_views.clear(); | ||
| 322 | image_views.clear(); | ||
| 323 | |||
| 324 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 325 | key.shaders = GetShaderAddresses(shaders); | ||
| 326 | SetupShaderDescriptors(shaders); | ||
| 327 | |||
| 328 | buffer_cache.Unmap(); | ||
| 329 | |||
| 330 | const auto texceptions = UpdateAttachments(); | ||
| 331 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | ||
| 332 | |||
| 333 | key.renderpass_params = GetRenderPassParams(texceptions); | ||
| 334 | |||
| 335 | auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); | ||
| 336 | scheduler.BindGraphicsPipeline(pipeline.GetHandle()); | ||
| 337 | |||
| 338 | const auto renderpass = pipeline.GetRenderPass(); | ||
| 339 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||
| 340 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); | ||
| 341 | |||
| 342 | UpdateDynamicStates(); | ||
| 343 | |||
| 344 | buffer_bindings.Bind(scheduler); | ||
| 345 | |||
| 346 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 347 | scheduler.Record( | ||
| 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | ||
| 349 | } | ||
| 350 | |||
| 351 | const auto pipeline_layout = pipeline.GetLayout(); | ||
| 352 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 353 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | ||
| 354 | if (descriptor_set) { | ||
| 355 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, | ||
| 356 | DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); | ||
| 357 | } | ||
| 358 | draw_params.Draw(cmdbuf, dld); | ||
| 359 | }); | ||
| 360 | } | ||
| 361 | |||
| 362 | void RasterizerVulkan::Clear() { | ||
| 363 | MICROPROFILE_SCOPE(Vulkan_Clearing); | ||
| 364 | |||
| 365 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 366 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | ||
| 367 | return; | ||
| 368 | } | ||
| 369 | |||
| 370 | const auto& regs = gpu.regs; | ||
| 371 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | ||
| 372 | regs.clear_buffers.A; | ||
| 373 | const bool use_depth = regs.clear_buffers.Z; | ||
| 374 | const bool use_stencil = regs.clear_buffers.S; | ||
| 375 | if (!use_color && !use_depth && !use_stencil) { | ||
| 376 | return; | ||
| 377 | } | ||
| 378 | // Clearing images requires to be out of a renderpass | ||
| 379 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 380 | |||
| 381 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | ||
| 382 | |||
| 383 | if (use_color) { | ||
| 384 | View color_view; | ||
| 385 | { | ||
| 386 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 387 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | ||
| 388 | } | ||
| 389 | |||
| 390 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 391 | vk::PipelineStageFlagBits::eTransfer, | ||
| 392 | vk::AccessFlagBits::eTransferWrite); | ||
| 393 | |||
| 394 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | ||
| 395 | regs.clear_color[2], regs.clear_color[3]}; | ||
| 396 | const vk::ClearColorValue clear(clear_color); | ||
| 397 | scheduler.Record([image = color_view->GetImage(), | ||
| 398 | subresource = color_view->GetImageSubresourceRange(), | ||
| 399 | clear](auto cmdbuf, auto& dld) { | ||
| 400 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | ||
| 401 | dld); | ||
| 402 | }); | ||
| 403 | } | ||
| 404 | if (use_depth || use_stencil) { | ||
| 405 | View zeta_surface; | ||
| 406 | { | ||
| 407 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 408 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 409 | } | ||
| 410 | |||
| 411 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | ||
| 412 | vk::PipelineStageFlagBits::eTransfer, | ||
| 413 | vk::AccessFlagBits::eTransferWrite); | ||
| 414 | |||
| 415 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | ||
| 416 | static_cast<u32>(regs.clear_stencil)); | ||
| 417 | scheduler.Record([image = zeta_surface->GetImage(), | ||
| 418 | subresource = zeta_surface->GetImageSubresourceRange(), | ||
| 419 | clear](auto cmdbuf, auto& dld) { | ||
| 420 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 421 | subresource, dld); | ||
| 422 | }); | ||
| 423 | } | ||
| 424 | } | ||
| 425 | |||
| 426 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | ||
| 427 | MICROPROFILE_SCOPE(Vulkan_Compute); | ||
| 428 | update_descriptor_queue.Acquire(); | ||
| 429 | sampled_views.clear(); | ||
| 430 | image_views.clear(); | ||
| 431 | |||
| 432 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 433 | const ComputePipelineCacheKey key{ | ||
| 434 | code_addr, | ||
| 435 | launch_desc.shared_alloc, | ||
| 436 | {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; | ||
| 437 | auto& pipeline = pipeline_cache.GetComputePipeline(key); | ||
| 438 | |||
| 439 | // Compute dispatches can't be executed inside a renderpass | ||
| 440 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 441 | |||
| 442 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 443 | |||
| 444 | const auto& entries = pipeline.GetEntries(); | ||
| 445 | SetupComputeConstBuffers(entries); | ||
| 446 | SetupComputeGlobalBuffers(entries); | ||
| 447 | SetupComputeTexelBuffers(entries); | ||
| 448 | SetupComputeTextures(entries); | ||
| 449 | SetupComputeImages(entries); | ||
| 450 | |||
| 451 | buffer_cache.Unmap(); | ||
| 452 | |||
| 453 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 454 | vk::AccessFlagBits::eShaderRead); | ||
| 455 | TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, | ||
| 456 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 457 | |||
| 458 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | ||
| 459 | scheduler.Record( | ||
| 460 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); | ||
| 461 | } | ||
| 462 | |||
| 463 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 464 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | ||
| 465 | layout = pipeline.GetLayout(), | ||
| 466 | descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { | ||
| 467 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); | ||
| 468 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, | ||
| 469 | &descriptor_set, 0, nullptr, dld); | ||
| 470 | cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); | ||
| 471 | }); | ||
| 472 | } | ||
| 473 | |||
| 474 | void RasterizerVulkan::FlushAll() {} | ||
| 475 | |||
| 476 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | ||
| 477 | texture_cache.FlushRegion(addr, size); | ||
| 478 | buffer_cache.FlushRegion(addr, size); | ||
| 479 | } | ||
| 480 | |||
| 481 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | ||
| 482 | texture_cache.InvalidateRegion(addr, size); | ||
| 483 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 484 | buffer_cache.InvalidateRegion(addr, size); | ||
| 485 | } | ||
| 486 | |||
| 487 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||
| 488 | FlushRegion(addr, size); | ||
| 489 | InvalidateRegion(addr, size); | ||
| 490 | } | ||
| 491 | |||
| 492 | void RasterizerVulkan::FlushCommands() { | ||
| 493 | if (draw_counter > 0) { | ||
| 494 | draw_counter = 0; | ||
| 495 | scheduler.Flush(); | ||
| 496 | } | ||
| 497 | } | ||
| 498 | |||
| 499 | void RasterizerVulkan::TickFrame() { | ||
| 500 | draw_counter = 0; | ||
| 501 | update_descriptor_queue.TickFrame(); | ||
| 502 | buffer_cache.TickFrame(); | ||
| 503 | staging_pool.TickFrame(); | ||
| 504 | } | ||
| 505 | |||
| 506 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 507 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 508 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 509 | texture_cache.DoFermiCopy(src, dst, copy_config); | ||
| 510 | return true; | ||
| 511 | } | ||
| 512 | |||
| 513 | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||
| 514 | VAddr framebuffer_addr, u32 pixel_stride) { | ||
| 515 | if (!framebuffer_addr) { | ||
| 516 | return false; | ||
| 517 | } | ||
| 518 | |||
| 519 | const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | ||
| 520 | const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||
| 521 | if (!surface) { | ||
| 522 | return false; | ||
| 523 | } | ||
| 524 | |||
| 525 | // Verify that the cached surface is the same size and format as the requested framebuffer | ||
| 526 | const auto& params{surface->GetSurfaceParams()}; | ||
| 527 | const auto& pixel_format{ | ||
| 528 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 529 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | ||
| 530 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | ||
| 531 | |||
| 532 | screen_info.image = &surface->GetImage(); | ||
| 533 | screen_info.width = params.width; | ||
| 534 | screen_info.height = params.height; | ||
| 535 | screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; | ||
| 536 | return true; | ||
| 537 | } | ||
| 538 | |||
| 539 | void RasterizerVulkan::FlushWork() { | ||
| 540 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; | ||
| 541 | |||
| 542 | // Only check multiples of 8 draws | ||
| 543 | static_assert(DRAWS_TO_DISPATCH % 8 == 0); | ||
| 544 | if ((++draw_counter & 7) != 7) { | ||
| 545 | return; | ||
| 546 | } | ||
| 547 | |||
| 548 | if (draw_counter < DRAWS_TO_DISPATCH) { | ||
| 549 | // Send recorded tasks to the worker thread | ||
| 550 | scheduler.DispatchWork(); | ||
| 551 | return; | ||
| 552 | } | ||
| 553 | |||
| 554 | // Otherwise (every certain number of draws) flush execution. | ||
| 555 | // This submits commands to the Vulkan driver. | ||
| 556 | scheduler.Flush(); | ||
| 557 | draw_counter = 0; | ||
| 558 | } | ||
| 559 | |||
| 560 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | ||
| 561 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 562 | auto& dirty = system.GPU().Maxwell3D().dirty; | ||
| 563 | const bool update_rendertargets = dirty.render_settings; | ||
| 564 | dirty.render_settings = false; | ||
| 565 | |||
| 566 | texture_cache.GuardRenderTargets(true); | ||
| 567 | |||
| 568 | Texceptions texceptions; | ||
| 569 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 570 | if (update_rendertargets) { | ||
| 571 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); | ||
| 572 | } | ||
| 573 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | ||
| 574 | texceptions.set(rt); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | if (update_rendertargets) { | ||
| 579 | zeta_attachment = texture_cache.GetDepthBufferSurface(true); | ||
| 580 | } | ||
| 581 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | ||
| 582 | texceptions.set(ZETA_TEXCEPTION_INDEX); | ||
| 583 | } | ||
| 584 | |||
| 585 | texture_cache.GuardRenderTargets(false); | ||
| 586 | |||
| 587 | return texceptions; | ||
| 588 | } | ||
| 589 | |||
| 590 | bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { | ||
| 591 | bool overlap = false; | ||
| 592 | for (auto& [view, layout] : sampled_views) { | ||
| 593 | if (!attachment.IsSameSurface(*view)) { | ||
| 594 | continue; | ||
| 595 | } | ||
| 596 | overlap = true; | ||
| 597 | *layout = vk::ImageLayout::eGeneral; | ||
| 598 | } | ||
| 599 | return overlap; | ||
| 600 | } | ||
| 601 | |||
| 602 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( | ||
| 603 | vk::RenderPass renderpass) { | ||
| 604 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), | ||
| 605 | std::numeric_limits<u32>::max()}; | ||
| 606 | |||
| 607 | const auto MarkAsModifiedAndPush = [&](const View& view) { | ||
| 608 | if (view == nullptr) { | ||
| 609 | return false; | ||
| 610 | } | ||
| 611 | key.views.push_back(view->GetHandle()); | ||
| 612 | key.width = std::min(key.width, view->GetWidth()); | ||
| 613 | key.height = std::min(key.height, view->GetHeight()); | ||
| 614 | return true; | ||
| 615 | }; | ||
| 616 | |||
| 617 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { | ||
| 618 | if (MarkAsModifiedAndPush(color_attachments[index])) { | ||
| 619 | texture_cache.MarkColorBufferInUse(index); | ||
| 620 | } | ||
| 621 | } | ||
| 622 | if (MarkAsModifiedAndPush(zeta_attachment)) { | ||
| 623 | texture_cache.MarkDepthBufferInUse(); | ||
| 624 | } | ||
| 625 | |||
| 626 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | ||
| 627 | auto& framebuffer = fbentry->second; | ||
| 628 | if (is_cache_miss) { | ||
| 629 | const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, | ||
| 630 | static_cast<u32>(key.views.size()), | ||
| 631 | key.views.data(), key.width, key.height, 1); | ||
| 632 | const auto dev = device.GetLogical(); | ||
| 633 | const auto& dld = device.GetDispatchLoader(); | ||
| 634 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | ||
| 635 | } | ||
| 636 | |||
| 637 | return {*framebuffer, vk::Extent2D{key.width, key.height}}; | ||
| 638 | } | ||
| 639 | |||
| 640 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 641 | BufferBindings& buffer_bindings, | ||
| 642 | bool is_indexed, | ||
| 643 | bool is_instanced) { | ||
| 644 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 645 | |||
| 646 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 647 | const auto& regs = gpu.regs; | ||
| 648 | |||
| 649 | SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); | ||
| 650 | |||
| 651 | const u32 base_instance = regs.vb_base_instance; | ||
| 652 | const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; | ||
| 653 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 654 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 655 | |||
| 656 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 657 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 658 | |||
| 659 | return params; | ||
| 660 | } | ||
| 661 | |||
| 662 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 663 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | ||
| 664 | texture_cache.GuardSamplers(true); | ||
| 665 | |||
| 666 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 667 | // Skip VertexA stage | ||
| 668 | const auto& shader = shaders[stage + 1]; | ||
| 669 | if (!shader) { | ||
| 670 | continue; | ||
| 671 | } | ||
| 672 | const auto& entries = shader->GetEntries(); | ||
| 673 | SetupGraphicsConstBuffers(entries, stage); | ||
| 674 | SetupGraphicsGlobalBuffers(entries, stage); | ||
| 675 | SetupGraphicsTexelBuffers(entries, stage); | ||
| 676 | SetupGraphicsTextures(entries, stage); | ||
| 677 | SetupGraphicsImages(entries, stage); | ||
| 678 | } | ||
| 679 | texture_cache.GuardSamplers(false); | ||
| 680 | } | ||
| 681 | |||
| 682 | void RasterizerVulkan::SetupImageTransitions( | ||
| 683 | Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 684 | const View& zeta_attachment) { | ||
| 685 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 686 | vk::AccessFlagBits::eShaderRead); | ||
| 687 | TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, | ||
| 688 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 689 | |||
| 690 | for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { | ||
| 691 | const auto color_attachment = color_attachments[rt]; | ||
| 692 | if (color_attachment == nullptr) { | ||
| 693 | continue; | ||
| 694 | } | ||
| 695 | const auto image_layout = | ||
| 696 | texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; | ||
| 697 | color_attachment->Transition( | ||
| 698 | image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, | ||
| 699 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); | ||
| 700 | } | ||
| 701 | |||
| 702 | if (zeta_attachment != nullptr) { | ||
| 703 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | ||
| 704 | ? vk::ImageLayout::eGeneral | ||
| 705 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | ||
| 706 | zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, | ||
| 707 | vk::AccessFlagBits::eDepthStencilAttachmentRead | | ||
| 708 | vk::AccessFlagBits::eDepthStencilAttachmentWrite); | ||
| 709 | } | ||
| 710 | } | ||
| 711 | |||
| 712 | void RasterizerVulkan::UpdateDynamicStates() { | ||
| 713 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 714 | UpdateViewportsState(gpu); | ||
| 715 | UpdateScissorsState(gpu); | ||
| 716 | UpdateDepthBias(gpu); | ||
| 717 | UpdateBlendConstants(gpu); | ||
| 718 | UpdateDepthBounds(gpu); | ||
| 719 | UpdateStencilFaces(gpu); | ||
| 720 | } | ||
| 721 | |||
| 722 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 723 | BufferBindings& buffer_bindings) { | ||
| 724 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 725 | |||
| 726 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) { | ||
| 727 | const auto& attrib = regs.vertex_attrib_format[index]; | ||
| 728 | if (!attrib.IsValid()) { | ||
| 729 | continue; | ||
| 730 | } | ||
| 731 | |||
| 732 | const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 733 | ASSERT(buffer.IsEnabled()); | ||
| 734 | |||
| 735 | vertex_input.attributes[vertex_input.num_attributes++] = | ||
| 736 | FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, | ||
| 737 | attrib.offset); | ||
| 738 | } | ||
| 739 | |||
| 740 | for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) { | ||
| 741 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 742 | if (!vertex_array.IsEnabled()) { | ||
| 743 | continue; | ||
| 744 | } | ||
| 745 | |||
| 746 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 747 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 748 | |||
| 749 | ASSERT(end > start); | ||
| 750 | const std::size_t size{end - start + 1}; | ||
| 751 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | ||
| 752 | |||
| 753 | vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( | ||
| 754 | index, vertex_array.stride, | ||
| 755 | regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); | ||
| 756 | buffer_bindings.AddVertexBinding(buffer, offset); | ||
| 757 | } | ||
| 758 | } | ||
| 759 | |||
| 760 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 761 | bool is_indexed) { | ||
| 762 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 763 | switch (regs.draw.topology) { | ||
| 764 | case Maxwell::PrimitiveTopology::Quads: | ||
| 765 | if (params.is_indexed) { | ||
| 766 | UNIMPLEMENTED(); | ||
| 767 | } else { | ||
| 768 | const auto [buffer, offset] = | ||
| 769 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 770 | buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); | ||
| 771 | params.base_vertex = 0; | ||
| 772 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 773 | params.is_indexed = true; | ||
| 774 | } | ||
| 775 | break; | ||
| 776 | default: { | ||
| 777 | if (!is_indexed) { | ||
| 778 | break; | ||
| 779 | } | ||
| 780 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 781 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 782 | |||
| 783 | auto format = regs.index_array.format; | ||
| 784 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 785 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 786 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); | ||
| 787 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 788 | } | ||
| 789 | |||
| 790 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 791 | break; | ||
| 792 | } | ||
| 793 | } | ||
| 794 | } | ||
| 795 | |||
| 796 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 797 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 798 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 799 | const auto& shader_stage = gpu.state.shader_stages[stage]; | ||
| 800 | for (const auto& entry : entries.const_buffers) { | ||
| 801 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 806 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 807 | auto& gpu{system.GPU()}; | ||
| 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; | ||
| 809 | |||
| 810 | for (const auto& entry : entries.global_buffers) { | ||
| 811 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 812 | SetupGlobalBuffer(entry, addr); | ||
| 813 | } | ||
| 814 | } | ||
| 815 | |||
| 816 | void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { | ||
| 817 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 818 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 819 | for (const auto& entry : entries.texel_buffers) { | ||
| 820 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | ||
| 821 | SetupTexelBuffer(image, entry); | ||
| 822 | } | ||
| 823 | } | ||
| 824 | |||
| 825 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { | ||
| 826 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 827 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 828 | for (const auto& entry : entries.samplers) { | ||
| 829 | const auto texture = GetTextureInfo(gpu, entry, stage); | ||
| 830 | SetupTexture(texture, entry); | ||
| 831 | } | ||
| 832 | } | ||
| 833 | |||
| 834 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | ||
| 835 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 836 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 837 | for (const auto& entry : entries.images) { | ||
| 838 | const auto tic = GetTextureInfo(gpu, entry, stage).tic; | ||
| 839 | SetupImage(tic, entry); | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 844 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 845 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 846 | for (const auto& entry : entries.const_buffers) { | ||
| 847 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 848 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 849 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 850 | buffer.address = config.Address(); | ||
| 851 | buffer.size = config.size; | ||
| 852 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 853 | SetupConstBuffer(entry, buffer); | ||
| 854 | } | ||
| 855 | } | ||
| 856 | |||
| 857 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 858 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 859 | const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; | ||
| 860 | for (const auto& entry : entries.global_buffers) { | ||
| 861 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 862 | SetupGlobalBuffer(entry, addr); | ||
| 863 | } | ||
| 864 | } | ||
| 865 | |||
| 866 | void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { | ||
| 867 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 868 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 869 | for (const auto& entry : entries.texel_buffers) { | ||
| 870 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 871 | SetupTexelBuffer(image, entry); | ||
| 872 | } | ||
| 873 | } | ||
| 874 | |||
| 875 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 876 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 877 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 878 | for (const auto& entry : entries.samplers) { | ||
| 879 | const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); | ||
| 880 | SetupTexture(texture, entry); | ||
| 881 | } | ||
| 882 | } | ||
| 883 | |||
| 884 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 885 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 886 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 887 | for (const auto& entry : entries.images) { | ||
| 888 | const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 889 | SetupImage(tic, entry); | ||
| 890 | } | ||
| 891 | } | ||
| 892 | |||
| 893 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 894 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 895 | // Align the size to avoid bad std140 interactions | ||
| 896 | const std::size_t size = | ||
| 897 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 898 | ASSERT(size <= MaxConstbufferSize); | ||
| 899 | |||
| 900 | const auto [buffer_handle, offset] = | ||
| 901 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | ||
| 902 | |||
| 903 | update_descriptor_queue.AddBuffer(buffer_handle, offset, size); | ||
| 904 | } | ||
| 905 | |||
| 906 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 907 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 908 | const auto actual_addr = memory_manager.Read<u64>(address); | ||
| 909 | const auto size = memory_manager.Read<u32>(address + 8); | ||
| 910 | |||
| 911 | if (size == 0) { | ||
| 912 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because | ||
| 913 | // Vulkan doesn't like empty buffers. | ||
| 914 | constexpr std::size_t dummy_size = 4; | ||
| 915 | const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 916 | update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); | ||
| 917 | return; | ||
| 918 | } | ||
| 919 | |||
| 920 | const auto [buffer, offset] = buffer_cache.UploadMemory( | ||
| 921 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 922 | update_descriptor_queue.AddBuffer(buffer, offset, size); | ||
| 923 | } | ||
| 924 | |||
| 925 | void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, | ||
| 926 | const TexelBufferEntry& entry) { | ||
| 927 | const auto view = texture_cache.GetTextureSurface(tic, entry); | ||
| 928 | ASSERT(view->IsBufferView()); | ||
| 929 | |||
| 930 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 931 | } | ||
| 932 | |||
| 933 | void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, | ||
| 934 | const SamplerEntry& entry) { | ||
| 935 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | ||
| 936 | ASSERT(!view->IsBufferView()); | ||
| 937 | |||
| 938 | const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, | ||
| 939 | texture.tic.z_source, texture.tic.w_source); | ||
| 940 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 941 | update_descriptor_queue.AddSampledImage(sampler, image_view); | ||
| 942 | |||
| 943 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 944 | *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; | ||
| 945 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 946 | } | ||
| 947 | |||
| 948 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | ||
| 949 | auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 950 | |||
| 951 | if (entry.IsWritten()) { | ||
| 952 | view->MarkAsModified(texture_cache.Tick()); | ||
| 953 | } | ||
| 954 | |||
| 955 | UNIMPLEMENTED_IF(tic.IsBuffer()); | ||
| 956 | |||
| 957 | const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 958 | update_descriptor_queue.AddImage(image_view); | ||
| 959 | |||
| 960 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | ||
| 961 | *image_layout = vk::ImageLayout::eGeneral; | ||
| 962 | image_views.push_back(ImageView{std::move(view), image_layout}); | ||
| 963 | } | ||
| 964 | |||
| 965 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 966 | if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { | ||
| 967 | return; | ||
| 968 | } | ||
| 969 | gpu.dirty.viewport_transform = false; | ||
| 970 | const auto& regs = gpu.regs; | ||
| 971 | const std::array viewports{ | ||
| 972 | GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), | ||
| 973 | GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), | ||
| 974 | GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), | ||
| 975 | GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), | ||
| 976 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | ||
| 977 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | ||
| 978 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | ||
| 979 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | ||
| 980 | scheduler.Record([viewports](auto cmdbuf, auto& dld) { | ||
| 981 | cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); | ||
| 982 | }); | ||
| 983 | } | ||
| 984 | |||
| 985 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { | ||
| 986 | if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { | ||
| 987 | return; | ||
| 988 | } | ||
| 989 | gpu.dirty.scissor_test = false; | ||
| 990 | const auto& regs = gpu.regs; | ||
| 991 | const std::array scissors = { | ||
| 992 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | ||
| 993 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | ||
| 994 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | ||
| 995 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | ||
| 996 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | ||
| 997 | GetScissorState(regs, 15)}; | ||
| 998 | scheduler.Record([scissors](auto cmdbuf, auto& dld) { | ||
| 999 | cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); | ||
| 1000 | }); | ||
| 1001 | } | ||
| 1002 | |||
| 1003 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1004 | if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { | ||
| 1005 | return; | ||
| 1006 | } | ||
| 1007 | gpu.dirty.polygon_offset = false; | ||
| 1008 | const auto& regs = gpu.regs; | ||
| 1009 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, | ||
| 1010 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { | ||
| 1011 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); | ||
| 1012 | }); | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1016 | if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { | ||
| 1017 | return; | ||
| 1018 | } | ||
| 1019 | gpu.dirty.blend_state = false; | ||
| 1020 | const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, | ||
| 1021 | gpu.regs.blend_color.b, gpu.regs.blend_color.a}; | ||
| 1022 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { | ||
| 1023 | cmdbuf.setBlendConstants(blend_color.data(), dld); | ||
| 1024 | }); | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1028 | if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { | ||
| 1029 | return; | ||
| 1030 | } | ||
| 1031 | gpu.dirty.depth_bounds_values = false; | ||
| 1032 | const auto& regs = gpu.regs; | ||
| 1033 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( | ||
| 1034 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { | ||
| 1038 | if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { | ||
| 1039 | return; | ||
| 1040 | } | ||
| 1041 | gpu.dirty.stencil_test = false; | ||
| 1042 | const auto& regs = gpu.regs; | ||
| 1043 | if (regs.stencil_two_side_enable) { | ||
| 1044 | // Separate values per face | ||
| 1045 | scheduler.Record( | ||
| 1046 | [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, | ||
| 1047 | front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, | ||
| 1048 | back_write_mask = regs.stencil_back_mask, | ||
| 1049 | back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1050 | // Front face | ||
| 1051 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); | ||
| 1052 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); | ||
| 1053 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); | ||
| 1054 | |||
| 1055 | // Back face | ||
| 1056 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); | ||
| 1057 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); | ||
| 1058 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); | ||
| 1059 | }); | ||
| 1060 | } else { | ||
| 1061 | // Front face defines both faces | ||
| 1062 | scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, | ||
| 1063 | test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | ||
| 1064 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); | ||
| 1065 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); | ||
| 1066 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); | ||
| 1067 | }); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1072 | std::size_t size = CalculateVertexArraysSize(); | ||
| 1073 | if (is_indexed) { | ||
| 1074 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1075 | } | ||
| 1076 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1077 | return size; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1081 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1082 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1083 | } | ||
| 1084 | |||
| 1085 | std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1086 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1087 | |||
| 1088 | std::size_t size = 0; | ||
| 1089 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1090 | // This implementation assumes that all attributes are used in the shader. | ||
| 1091 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1092 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1093 | DEBUG_ASSERT(end > start); | ||
| 1094 | |||
| 1095 | size += (end - start + 1) * regs.vertex_array[index].enable; | ||
| 1096 | } | ||
| 1097 | return size; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1101 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1102 | return static_cast<std::size_t>(regs.index_array.count) * | ||
| 1103 | static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | std::size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1107 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1108 | if (entry.IsIndirect()) { | ||
| 1109 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1110 | return buffer.size; | ||
| 1111 | } else { | ||
| 1112 | // Buffer is accessed directly, upload just what we use | ||
| 1113 | return entry.GetSize(); | ||
| 1114 | } | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | ||
| 1118 | using namespace VideoCore::Surface; | ||
| 1119 | |||
| 1120 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1121 | RenderPassParams renderpass_params; | ||
| 1122 | |||
| 1123 | for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { | ||
| 1124 | const auto& rendertarget = regs.rt[rt]; | ||
| 1125 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) | ||
| 1126 | continue; | ||
| 1127 | renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ | ||
| 1128 | static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), | ||
| 1129 | texceptions.test(rt)}); | ||
| 1130 | } | ||
| 1131 | |||
| 1132 | renderpass_params.has_zeta = regs.zeta_enable; | ||
| 1133 | if (renderpass_params.has_zeta) { | ||
| 1134 | renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 1135 | renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1136 | } | ||
| 1137 | |||
| 1138 | return renderpass_params; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fc324952b..7be71e734 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -4,10 +4,260 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <memory> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/static_vector.hpp> | ||
| 14 | #include <boost/functional/hash.hpp> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/memory_manager.h" | ||
| 18 | #include "video_core/rasterizer_accelerated.h" | ||
| 7 | #include "video_core/rasterizer_interface.h" | 19 | #include "video_core/rasterizer_interface.h" |
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 31 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 32 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 33 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 34 | |||
| 35 | namespace Core { | ||
| 36 | class System; | ||
| 37 | } | ||
| 38 | |||
| 39 | namespace Core::Frontend { | ||
| 40 | class EmuWindow; | ||
| 41 | } | ||
| 42 | |||
| 43 | namespace Tegra::Engines { | ||
| 44 | class Maxwell3D; | ||
| 45 | } | ||
| 46 | |||
| 47 | namespace Vulkan { | ||
| 48 | |||
| 49 | struct VKScreenInfo; | ||
| 50 | |||
| 51 | using ImageViewsPack = | ||
| 52 | boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; | ||
| 53 | |||
| 54 | struct FramebufferCacheKey { | ||
| 55 | vk::RenderPass renderpass{}; | ||
| 56 | u32 width = 0; | ||
| 57 | u32 height = 0; | ||
| 58 | ImageViewsPack views; | ||
| 59 | |||
| 60 | std::size_t Hash() const noexcept { | ||
| 61 | std::size_t hash = 0; | ||
| 62 | boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass)); | ||
| 63 | for (const auto& view : views) { | ||
| 64 | boost::hash_combine(hash, static_cast<VkImageView>(view)); | ||
| 65 | } | ||
| 66 | boost::hash_combine(hash, width); | ||
| 67 | boost::hash_combine(hash, height); | ||
| 68 | return hash; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool operator==(const FramebufferCacheKey& rhs) const noexcept { | ||
| 72 | return std::tie(renderpass, views, width, height) == | ||
| 73 | std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | |||
| 77 | } // namespace Vulkan | ||
| 78 | |||
| 79 | namespace std { | ||
| 80 | |||
| 81 | template <> | ||
| 82 | struct hash<Vulkan::FramebufferCacheKey> { | ||
| 83 | std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { | ||
| 84 | return k.Hash(); | ||
| 85 | } | ||
| 86 | }; | ||
| 87 | |||
| 88 | } // namespace std | ||
| 8 | 89 | ||
| 9 | namespace Vulkan { | 90 | namespace Vulkan { |
| 10 | 91 | ||
| 11 | class RasterizerVulkan : public VideoCore::RasterizerInterface {}; | 92 | class BufferBindings; |
| 93 | |||
| 94 | struct ImageView { | ||
| 95 | View view; | ||
| 96 | vk::ImageLayout* layout = nullptr; | ||
| 97 | }; | ||
| 98 | |||
| 99 | class RasterizerVulkan : public VideoCore::RasterizerAccelerated { | ||
| 100 | public: | ||
| 101 | explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | ||
| 102 | VKScreenInfo& screen_info, const VKDevice& device, | ||
| 103 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | ||
| 104 | VKScheduler& scheduler); | ||
| 105 | ~RasterizerVulkan() override; | ||
| 106 | |||
| 107 | bool DrawBatch(bool is_indexed) override; | ||
| 108 | bool DrawMultiBatch(bool is_indexed) override; | ||
| 109 | void Clear() override; | ||
| 110 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 111 | void FlushAll() override; | ||
| 112 | void FlushRegion(CacheAddr addr, u64 size) override; | ||
| 113 | void InvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 114 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||
| 115 | void FlushCommands() override; | ||
| 116 | void TickFrame() override; | ||
| 117 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||
| 118 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||
| 119 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 120 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||
| 121 | u32 pixel_stride) override; | ||
| 122 | |||
| 123 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 124 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 125 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 126 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 127 | |||
| 128 | private: | ||
| 129 | struct DrawParameters { | ||
| 130 | void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; | ||
| 131 | |||
| 132 | u32 base_instance = 0; | ||
| 133 | u32 num_instances = 0; | ||
| 134 | u32 base_vertex = 0; | ||
| 135 | u32 num_vertices = 0; | ||
| 136 | bool is_indexed = 0; | ||
| 137 | }; | ||
| 138 | |||
| 139 | using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>; | ||
| 140 | |||
| 141 | static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; | ||
| 142 | |||
| 143 | void Draw(bool is_indexed, bool is_instanced); | ||
| 144 | |||
| 145 | void FlushWork(); | ||
| 146 | |||
| 147 | Texceptions UpdateAttachments(); | ||
| 148 | |||
| 149 | std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); | ||
| 150 | |||
| 151 | /// Setups geometry buffers and state. | ||
| 152 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 153 | bool is_indexed, bool is_instanced); | ||
| 154 | |||
| 155 | /// Setup descriptors in the graphics pipeline. | ||
| 156 | void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); | ||
| 157 | |||
| 158 | void SetupImageTransitions(Texceptions texceptions, | ||
| 159 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | ||
| 160 | const View& zeta_attachment); | ||
| 161 | |||
| 162 | void UpdateDynamicStates(); | ||
| 163 | |||
| 164 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | ||
| 165 | |||
| 166 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | ||
| 167 | BufferBindings& buffer_bindings); | ||
| 168 | |||
| 169 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 170 | |||
| 171 | /// Setup constant buffers in the graphics pipeline. | ||
| 172 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 173 | |||
| 174 | /// Setup global buffers in the graphics pipeline. | ||
| 175 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 176 | |||
| 177 | /// Setup texel buffers in the graphics pipeline. | ||
| 178 | void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 179 | |||
| 180 | /// Setup textures in the graphics pipeline. | ||
| 181 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 182 | |||
| 183 | /// Setup images in the graphics pipeline. | ||
| 184 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 185 | |||
| 186 | /// Setup constant buffers in the compute pipeline. | ||
| 187 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 188 | |||
| 189 | /// Setup global buffers in the compute pipeline. | ||
| 190 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 191 | |||
| 192 | /// Setup texel buffers in the compute pipeline. | ||
| 193 | void SetupComputeTexelBuffers(const ShaderEntries& entries); | ||
| 194 | |||
| 195 | /// Setup textures in the compute pipeline. | ||
| 196 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 197 | |||
| 198 | /// Setup images in the compute pipeline. | ||
| 199 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 200 | |||
| 201 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 202 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 203 | |||
| 204 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 205 | |||
| 206 | void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); | ||
| 207 | |||
| 208 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | ||
| 209 | |||
| 210 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | ||
| 211 | |||
| 212 | void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 213 | void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); | ||
| 214 | void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); | ||
| 215 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); | ||
| 216 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); | ||
| 217 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); | ||
| 218 | |||
| 219 | std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 220 | |||
| 221 | std::size_t CalculateComputeStreamBufferSize() const; | ||
| 222 | |||
| 223 | std::size_t CalculateVertexArraysSize() const; | ||
| 224 | |||
| 225 | std::size_t CalculateIndexBufferSize() const; | ||
| 226 | |||
| 227 | std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 228 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 229 | |||
| 230 | RenderPassParams GetRenderPassParams(Texceptions texceptions) const; | ||
| 231 | |||
| 232 | Core::System& system; | ||
| 233 | Core::Frontend::EmuWindow& render_window; | ||
| 234 | VKScreenInfo& screen_info; | ||
| 235 | const VKDevice& device; | ||
| 236 | VKResourceManager& resource_manager; | ||
| 237 | VKMemoryManager& memory_manager; | ||
| 238 | VKScheduler& scheduler; | ||
| 239 | |||
| 240 | VKStagingBufferPool staging_pool; | ||
| 241 | VKDescriptorPool descriptor_pool; | ||
| 242 | VKUpdateDescriptorQueue update_descriptor_queue; | ||
| 243 | QuadArrayPass quad_array_pass; | ||
| 244 | Uint8Pass uint8_pass; | ||
| 245 | |||
| 246 | VKTextureCache texture_cache; | ||
| 247 | VKPipelineCache pipeline_cache; | ||
| 248 | VKBufferCache buffer_cache; | ||
| 249 | VKSamplerCache sampler_cache; | ||
| 250 | |||
| 251 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||
| 252 | View zeta_attachment; | ||
| 253 | |||
| 254 | std::vector<ImageView> sampled_views; | ||
| 255 | std::vector<ImageView> image_views; | ||
| 256 | |||
| 257 | u32 draw_counter = 0; | ||
| 258 | |||
| 259 | // TODO(Rodrigo): Invalidate on image destruction | ||
| 260 | std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; | ||
| 261 | }; | ||
| 12 | 262 | ||
| 13 | } // namespace Vulkan | 263 | } // namespace Vulkan |