diff options
8 files changed, 129 insertions, 126 deletions
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index be1c31978..a7f256ff9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size | |||
| 140 | enable.Assign(1); | 140 | enable.Assign(1); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | void FixedPipelineState::Fill(const Maxwell& regs) { | ||
| 144 | rasterizer.Fill(regs); | ||
| 145 | depth_stencil.Fill(regs); | ||
| 146 | color_blending.Fill(regs); | ||
| 147 | } | ||
| 148 | |||
| 143 | std::size_t FixedPipelineState::Hash() const noexcept { | 149 | std::size_t FixedPipelineState::Hash() const noexcept { |
| 144 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 150 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 145 | return static_cast<std::size_t>(hash); | 151 | return static_cast<std::size_t>(hash); |
| @@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep | |||
| 149 | return std::memcmp(this, &rhs, sizeof *this) == 0; | 155 | return std::memcmp(this, &rhs, sizeof *this) == 0; |
| 150 | } | 156 | } |
| 151 | 157 | ||
| 152 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { | ||
| 153 | FixedPipelineState fixed_state; | ||
| 154 | fixed_state.rasterizer.Fill(regs); | ||
| 155 | fixed_state.depth_stencil.Fill(regs); | ||
| 156 | fixed_state.color_blending.Fill(regs); | ||
| 157 | fixed_state.padding = {}; | ||
| 158 | return fixed_state; | ||
| 159 | } | ||
| 160 | |||
| 161 | u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { | 158 | u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { |
| 162 | // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 | 159 | // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 |
| 163 | // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. | 160 | // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 9a950f4de..77188b862 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -17,7 +17,7 @@ namespace Vulkan { | |||
| 17 | 17 | ||
| 18 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 18 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 19 | 19 | ||
| 20 | struct alignas(32) FixedPipelineState { | 20 | struct FixedPipelineState { |
| 21 | static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; | 21 | static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; |
| 22 | static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; | 22 | static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; |
| 23 | 23 | ||
| @@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState { | |||
| 237 | Rasterizer rasterizer; | 237 | Rasterizer rasterizer; |
| 238 | DepthStencil depth_stencil; | 238 | DepthStencil depth_stencil; |
| 239 | ColorBlending color_blending; | 239 | ColorBlending color_blending; |
| 240 | std::array<u8, 20> padding; | 240 | |
| 241 | void Fill(const Maxwell& regs); | ||
| 241 | 242 | ||
| 242 | std::size_t Hash() const noexcept; | 243 | std::size_t Hash() const noexcept; |
| 243 | 244 | ||
| @@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState { | |||
| 250 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); | 251 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); |
| 251 | static_assert(std::is_trivially_copyable_v<FixedPipelineState>); | 252 | static_assert(std::is_trivially_copyable_v<FixedPipelineState>); |
| 252 | static_assert(std::is_trivially_constructible_v<FixedPipelineState>); | 253 | static_assert(std::is_trivially_constructible_v<FixedPipelineState>); |
| 253 | static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned"); | ||
| 254 | |||
| 255 | FixedPipelineState GetFixedPipelineState(const Maxwell& regs); | ||
| 256 | 254 | ||
| 257 | } // namespace Vulkan | 255 | } // namespace Vulkan |
| 258 | 256 | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8332b42aa..45bd1fc6c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -288,7 +288,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa | |||
| 288 | depth_stencil_ci.maxDepthBounds = 0.0f; | 288 | depth_stencil_ci.maxDepthBounds = 0.0f; |
| 289 | 289 | ||
| 290 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 290 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 291 | const std::size_t num_attachments = renderpass_params.color_attachments.size(); | 291 | const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); |
| 292 | for (std::size_t index = 0; index < num_attachments; ++index) { | 292 | for (std::size_t index = 0; index < num_attachments; ++index) { |
| 293 | static constexpr std::array COMPONENT_TABLE = { | 293 | static constexpr std::array COMPONENT_TABLE = { |
| 294 | VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, | 294 | VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 91b1b16a5..e6d4adc92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, | |||
| 161 | 161 | ||
| 162 | } // Anonymous namespace | 162 | } // Anonymous namespace |
| 163 | 163 | ||
| 164 | std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | ||
| 165 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||
| 166 | return static_cast<std::size_t>(hash); | ||
| 167 | } | ||
| 168 | |||
| 169 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 170 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 171 | } | ||
| 172 | |||
| 173 | std::size_t ComputePipelineCacheKey::Hash() const noexcept { | ||
| 174 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||
| 175 | return static_cast<std::size_t>(hash); | ||
| 176 | } | ||
| 177 | |||
| 178 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | ||
| 179 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 180 | } | ||
| 181 | |||
| 164 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | 182 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, |
| 165 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, | 183 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, |
| 166 | u32 main_offset) | 184 | u32 main_offset) |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 602a0a340..84d26b822 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | 10 | #include <type_traits> |
| 12 | #include <unordered_map> | 11 | #include <unordered_map> |
| 13 | #include <utility> | 12 | #include <utility> |
| @@ -51,42 +50,38 @@ using ProgramCode = std::vector<u64>; | |||
| 51 | 50 | ||
| 52 | struct GraphicsPipelineCacheKey { | 51 | struct GraphicsPipelineCacheKey { |
| 53 | FixedPipelineState fixed_state; | 52 | FixedPipelineState fixed_state; |
| 54 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||
| 55 | RenderPassParams renderpass_params; | 53 | RenderPassParams renderpass_params; |
| 54 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||
| 55 | u64 padding; // This is necessary for unique object representations | ||
| 56 | 56 | ||
| 57 | std::size_t Hash() const noexcept { | 57 | std::size_t Hash() const noexcept; |
| 58 | std::size_t hash = fixed_state.Hash(); | 58 | |
| 59 | for (const auto& shader : shaders) { | 59 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; |
| 60 | boost::hash_combine(hash, shader); | ||
| 61 | } | ||
| 62 | boost::hash_combine(hash, renderpass_params.Hash()); | ||
| 63 | return hash; | ||
| 64 | } | ||
| 65 | 60 | ||
| 66 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | 61 | bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { |
| 67 | return std::tie(fixed_state, shaders, renderpass_params) == | 62 | return !operator==(rhs); |
| 68 | std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params); | ||
| 69 | } | 63 | } |
| 70 | }; | 64 | }; |
| 65 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | ||
| 66 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | ||
| 67 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | ||
| 71 | 68 | ||
| 72 | struct ComputePipelineCacheKey { | 69 | struct ComputePipelineCacheKey { |
| 73 | GPUVAddr shader{}; | 70 | GPUVAddr shader; |
| 74 | u32 shared_memory_size{}; | 71 | u32 shared_memory_size; |
| 75 | std::array<u32, 3> workgroup_size{}; | 72 | std::array<u32, 3> workgroup_size; |
| 76 | 73 | ||
| 77 | std::size_t Hash() const noexcept { | 74 | std::size_t Hash() const noexcept; |
| 78 | return static_cast<std::size_t>(shader) ^ | 75 | |
| 79 | ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^ | 76 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |
| 80 | static_cast<std::size_t>(workgroup_size[0]) ^ | ||
| 81 | (static_cast<std::size_t>(workgroup_size[1]) << 16) ^ | ||
| 82 | (static_cast<std::size_t>(workgroup_size[2]) << 24); | ||
| 83 | } | ||
| 84 | 77 | ||
| 85 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept { | 78 | bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept { |
| 86 | return std::tie(shader, shared_memory_size, workgroup_size) == | 79 | return !operator==(rhs); |
| 87 | std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size); | ||
| 88 | } | 80 | } |
| 89 | }; | 81 | }; |
| 82 | static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>); | ||
| 83 | static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>); | ||
| 84 | static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | ||
| 90 | 85 | ||
| 91 | } // namespace Vulkan | 86 | } // namespace Vulkan |
| 92 | 87 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 68464e637..c821b1229 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 316 | query_cache.UpdateCounters(); | 316 | query_cache.UpdateCounters(); |
| 317 | 317 | ||
| 318 | const auto& gpu = system.GPU().Maxwell3D(); | 318 | const auto& gpu = system.GPU().Maxwell3D(); |
| 319 | GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | 319 | GraphicsPipelineCacheKey key; |
| 320 | key.fixed_state.Fill(gpu.regs); | ||
| 320 | 321 | ||
| 321 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | 322 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); |
| 322 | 323 | ||
| @@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 334 | 335 | ||
| 335 | buffer_cache.Unmap(); | 336 | buffer_cache.Unmap(); |
| 336 | 337 | ||
| 337 | const auto texceptions = UpdateAttachments(); | 338 | const Texceptions texceptions = UpdateAttachments(); |
| 338 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | 339 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); |
| 339 | 340 | ||
| 340 | key.renderpass_params = GetRenderPassParams(texceptions); | 341 | key.renderpass_params = GetRenderPassParams(texceptions); |
| 342 | key.padding = 0; | ||
| 341 | 343 | ||
| 342 | auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); | 344 | auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); |
| 343 | scheduler.BindGraphicsPipeline(pipeline.GetHandle()); | 345 | scheduler.BindGraphicsPipeline(pipeline.GetHandle()); |
| @@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 453 | query_cache.UpdateCounters(); | 455 | query_cache.UpdateCounters(); |
| 454 | 456 | ||
| 455 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 457 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 456 | const ComputePipelineCacheKey key{ | 458 | ComputePipelineCacheKey key; |
| 457 | code_addr, | 459 | key.shader = code_addr; |
| 458 | launch_desc.shared_alloc, | 460 | key.shared_memory_size = launch_desc.shared_alloc; |
| 459 | {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; | 461 | key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, |
| 462 | launch_desc.block_dim_z}; | ||
| 463 | |||
| 460 | auto& pipeline = pipeline_cache.GetComputePipeline(key); | 464 | auto& pipeline = pipeline_cache.GetComputePipeline(key); |
| 461 | 465 | ||
| 462 | // Compute dispatches can't be executed inside a renderpass | 466 | // Compute dispatches can't be executed inside a renderpass |
| @@ -688,7 +692,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | |||
| 688 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), | 692 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), |
| 689 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | 693 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; |
| 690 | 694 | ||
| 691 | const auto try_push = [&](const View& view) { | 695 | const auto try_push = [&key](const View& view) { |
| 692 | if (!view) { | 696 | if (!view) { |
| 693 | return false; | 697 | return false; |
| 694 | } | 698 | } |
| @@ -699,7 +703,9 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | |||
| 699 | return true; | 703 | return true; |
| 700 | }; | 704 | }; |
| 701 | 705 | ||
| 702 | for (std::size_t index = 0; index < std::size(color_attachments); ++index) { | 706 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 707 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); | ||
| 708 | for (std::size_t index = 0; index < num_attachments; ++index) { | ||
| 703 | if (try_push(color_attachments[index])) { | 709 | if (try_push(color_attachments[index])) { |
| 704 | texture_cache.MarkColorBufferInUse(index); | 710 | texture_cache.MarkColorBufferInUse(index); |
| 705 | } | 711 | } |
| @@ -1250,28 +1256,29 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( | |||
| 1250 | } | 1256 | } |
| 1251 | 1257 | ||
| 1252 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { | 1258 | RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { |
| 1253 | using namespace VideoCore::Surface; | ||
| 1254 | |||
| 1255 | const auto& regs = system.GPU().Maxwell3D().regs; | 1259 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1256 | RenderPassParams renderpass_params; | 1260 | const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); |
| 1261 | |||
| 1262 | RenderPassParams params; | ||
| 1263 | params.color_formats = {}; | ||
| 1264 | std::size_t color_texceptions = 0; | ||
| 1257 | 1265 | ||
| 1258 | for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) { | 1266 | std::size_t index = 0; |
| 1267 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { | ||
| 1259 | const auto& rendertarget = regs.rt[rt]; | 1268 | const auto& rendertarget = regs.rt[rt]; |
| 1260 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { | 1269 | if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { |
| 1261 | continue; | 1270 | continue; |
| 1262 | } | 1271 | } |
| 1263 | renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ | 1272 | params.color_formats[index] = static_cast<u8>(rendertarget.format); |
| 1264 | static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), | 1273 | color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; |
| 1265 | texceptions[rt]}); | 1274 | ++index; |
| 1266 | } | 1275 | } |
| 1276 | params.num_color_attachments = static_cast<u8>(index); | ||
| 1277 | params.texceptions = static_cast<u8>(color_texceptions); | ||
| 1267 | 1278 | ||
| 1268 | renderpass_params.has_zeta = regs.zeta_enable; | 1279 | params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0; |
| 1269 | if (renderpass_params.has_zeta) { | 1280 | params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; |
| 1270 | renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | 1281 | return params; |
| 1271 | renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | return renderpass_params; | ||
| 1275 | } | 1282 | } |
| 1276 | 1283 | ||
| 1277 | VkBuffer RasterizerVulkan::DefaultBuffer() { | 1284 | VkBuffer RasterizerVulkan::DefaultBuffer() { |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 4e5286a69..3f71d005e 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp | |||
| @@ -2,9 +2,11 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 5 | #include <memory> | 6 | #include <memory> |
| 6 | #include <vector> | 7 | #include <vector> |
| 7 | 8 | ||
| 9 | #include "common/cityhash.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | 12 | #include "video_core/renderer_vulkan/vk_device.h" |
| @@ -13,6 +15,15 @@ | |||
| 13 | 15 | ||
| 14 | namespace Vulkan { | 16 | namespace Vulkan { |
| 15 | 17 | ||
| 18 | std::size_t RenderPassParams::Hash() const noexcept { | ||
| 19 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | ||
| 20 | return static_cast<std::size_t>(hash); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { | ||
| 24 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 25 | } | ||
| 26 | |||
| 16 | VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} | 27 | VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} |
| 17 | 28 | ||
| 18 | VKRenderPassCache::~VKRenderPassCache() = default; | 29 | VKRenderPassCache::~VKRenderPassCache() = default; |
| @@ -27,20 +38,22 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { | |||
| 27 | } | 38 | } |
| 28 | 39 | ||
| 29 | vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { | 40 | vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { |
| 41 | using namespace VideoCore::Surface; | ||
| 30 | std::vector<VkAttachmentDescription> descriptors; | 42 | std::vector<VkAttachmentDescription> descriptors; |
| 31 | std::vector<VkAttachmentReference> color_references; | 43 | std::vector<VkAttachmentReference> color_references; |
| 32 | 44 | ||
| 33 | for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { | 45 | const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments); |
| 34 | const auto attachment = params.color_attachments[rt]; | 46 | for (std::size_t rt = 0; rt < num_attachments; ++rt) { |
| 35 | const auto format = | 47 | const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]); |
| 36 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format); | 48 | const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); |
| 49 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 37 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | 50 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", |
| 38 | static_cast<u32>(attachment.pixel_format)); | 51 | static_cast<int>(pixel_format)); |
| 39 | 52 | ||
| 40 | // TODO(Rodrigo): Add eMayAlias when it's needed. | 53 | // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. |
| 41 | const auto color_layout = attachment.is_texception | 54 | const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 |
| 42 | ? VK_IMAGE_LAYOUT_GENERAL | 55 | ? VK_IMAGE_LAYOUT_GENERAL |
| 43 | : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; | 56 | : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| 44 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); | 57 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); |
| 45 | descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; | 58 | descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; |
| 46 | descriptor.format = format.format; | 59 | descriptor.format = format.format; |
| @@ -58,15 +71,17 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param | |||
| 58 | } | 71 | } |
| 59 | 72 | ||
| 60 | VkAttachmentReference zeta_attachment_ref; | 73 | VkAttachmentReference zeta_attachment_ref; |
| 61 | if (params.has_zeta) { | 74 | const bool has_zeta = params.zeta_format != 0; |
| 62 | const auto format = | 75 | if (has_zeta) { |
| 63 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); | 76 | const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format); |
| 77 | const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); | ||
| 78 | const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); | ||
| 64 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", | 79 | ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", |
| 65 | static_cast<u32>(params.zeta_pixel_format)); | 80 | static_cast<int>(pixel_format)); |
| 66 | 81 | ||
| 67 | const auto zeta_layout = params.zeta_texception | 82 | const VkImageLayout zeta_layout = params.zeta_texception != 0 |
| 68 | ? VK_IMAGE_LAYOUT_GENERAL | 83 | ? VK_IMAGE_LAYOUT_GENERAL |
| 69 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; | 84 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; |
| 70 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); | 85 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); |
| 71 | descriptor.flags = 0; | 86 | descriptor.flags = 0; |
| 72 | descriptor.format = format.format; | 87 | descriptor.format = format.format; |
| @@ -78,7 +93,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param | |||
| 78 | descriptor.initialLayout = zeta_layout; | 93 | descriptor.initialLayout = zeta_layout; |
| 79 | descriptor.finalLayout = zeta_layout; | 94 | descriptor.finalLayout = zeta_layout; |
| 80 | 95 | ||
| 81 | zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size()); | 96 | zeta_attachment_ref.attachment = static_cast<u32>(num_attachments); |
| 82 | zeta_attachment_ref.layout = zeta_layout; | 97 | zeta_attachment_ref.layout = zeta_layout; |
| 83 | } | 98 | } |
| 84 | 99 | ||
| @@ -90,7 +105,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param | |||
| 90 | subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); | 105 | subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); |
| 91 | subpass_description.pColorAttachments = color_references.data(); | 106 | subpass_description.pColorAttachments = color_references.data(); |
| 92 | subpass_description.pResolveAttachments = nullptr; | 107 | subpass_description.pResolveAttachments = nullptr; |
| 93 | subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr; | 108 | subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; |
| 94 | subpass_description.preserveAttachmentCount = 0; | 109 | subpass_description.preserveAttachmentCount = 0; |
| 95 | subpass_description.pPreserveAttachments = nullptr; | 110 | subpass_description.pPreserveAttachments = nullptr; |
| 96 | 111 | ||
| @@ -101,7 +116,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param | |||
| 101 | stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | 116 | stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; |
| 102 | } | 117 | } |
| 103 | 118 | ||
| 104 | if (params.has_zeta) { | 119 | if (has_zeta) { |
| 105 | access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | 120 | access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | |
| 106 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | 121 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 107 | stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; | 122 | stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 921b6efb5..8b0fec720 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h | |||
| @@ -4,8 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <type_traits> |
| 8 | #include <tuple> | ||
| 9 | #include <unordered_map> | 8 | #include <unordered_map> |
| 10 | 9 | ||
| 11 | #include <boost/container/static_vector.hpp> | 10 | #include <boost/container/static_vector.hpp> |
| @@ -19,51 +18,25 @@ namespace Vulkan { | |||
| 19 | 18 | ||
| 20 | class VKDevice; | 19 | class VKDevice; |
| 21 | 20 | ||
| 22 | // TODO(Rodrigo): Optimize this structure for faster hashing | ||
| 23 | |||
| 24 | struct RenderPassParams { | 21 | struct RenderPassParams { |
| 25 | struct ColorAttachment { | 22 | std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats; |
| 26 | u32 index = 0; | 23 | u8 num_color_attachments; |
| 27 | VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid; | 24 | u8 texceptions; |
| 28 | bool is_texception = false; | 25 | |
| 29 | 26 | u8 zeta_format; | |
| 30 | std::size_t Hash() const noexcept { | 27 | u8 zeta_texception; |
| 31 | return static_cast<std::size_t>(pixel_format) | | 28 | |
| 32 | static_cast<std::size_t>(is_texception) << 6 | | 29 | std::size_t Hash() const noexcept; |
| 33 | static_cast<std::size_t>(index) << 7; | 30 | |
| 34 | } | 31 | bool operator==(const RenderPassParams& rhs) const noexcept; |
| 35 | |||
| 36 | bool operator==(const ColorAttachment& rhs) const noexcept { | ||
| 37 | return std::tie(index, pixel_format, is_texception) == | ||
| 38 | std::tie(rhs.index, rhs.pixel_format, rhs.is_texception); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | boost::container::static_vector<ColorAttachment, | ||
| 43 | Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | ||
| 44 | color_attachments{}; | ||
| 45 | // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type. | ||
| 46 | VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid; | ||
| 47 | bool has_zeta = false; | ||
| 48 | bool zeta_texception = false; | ||
| 49 | |||
| 50 | std::size_t Hash() const noexcept { | ||
| 51 | std::size_t hash = 0; | ||
| 52 | for (const auto& rt : color_attachments) { | ||
| 53 | boost::hash_combine(hash, rt.Hash()); | ||
| 54 | } | ||
| 55 | boost::hash_combine(hash, zeta_pixel_format); | ||
| 56 | boost::hash_combine(hash, has_zeta); | ||
| 57 | boost::hash_combine(hash, zeta_texception); | ||
| 58 | return hash; | ||
| 59 | } | ||
| 60 | 32 | ||
| 61 | bool operator==(const RenderPassParams& rhs) const { | 33 | bool operator!=(const RenderPassParams& rhs) const noexcept { |
| 62 | return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) == | 34 | return !operator==(rhs); |
| 63 | std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta, | ||
| 64 | rhs.zeta_texception); | ||
| 65 | } | 35 | } |
| 66 | }; | 36 | }; |
| 37 | static_assert(std::has_unique_object_representations_v<RenderPassParams>); | ||
| 38 | static_assert(std::is_trivially_copyable_v<RenderPassParams>); | ||
| 39 | static_assert(std::is_trivially_constructible_v<RenderPassParams>); | ||
| 67 | 40 | ||
| 68 | } // namespace Vulkan | 41 | } // namespace Vulkan |
| 69 | 42 | ||