diff options
| author | 2021-06-02 02:15:07 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:34 -0400 | |
| commit | 4a2361a1e2271727f3259e8e4a60869165537253 (patch) | |
| tree | d741b0e808aa6a622c01dd047d66211c201e0f85 | |
| parent | transform_feedback: Read buffer stride from index instead of layout (diff) | |
| download | yuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.gz yuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.xz yuzu-4a2361a1e2271727f3259e8e4a60869165537253.zip | |
buffer_cache: Reduce uniform buffer size from shader usage
Increases performance significantly on certain titles.
Diffstat (limited to '')
11 files changed, 78 insertions, 38 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 6a5243c9f..fb2031fc8 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -560,32 +560,45 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 560 | case IR::Opcode::GetCbufU32: | 560 | case IR::Opcode::GetCbufU32: |
| 561 | case IR::Opcode::GetCbufF32: | 561 | case IR::Opcode::GetCbufF32: |
| 562 | case IR::Opcode::GetCbufU32x2: { | 562 | case IR::Opcode::GetCbufU32x2: { |
| 563 | if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { | 563 | const IR::Value index{inst.Arg(0)}; |
| 564 | AddConstantBufferDescriptor(info, index.U32(), 1); | 564 | const IR::Value offset{inst.Arg(1)}; |
| 565 | } else { | 565 | if (!index.IsImmediate()) { |
| 566 | throw NotImplementedException("Constant buffer with non-immediate index"); | 566 | throw NotImplementedException("Constant buffer with non-immediate index"); |
| 567 | } | 567 | } |
| 568 | AddConstantBufferDescriptor(info, index.U32(), 1); | ||
| 569 | u32 element_size{}; | ||
| 568 | switch (inst.GetOpcode()) { | 570 | switch (inst.GetOpcode()) { |
| 569 | case IR::Opcode::GetCbufU8: | 571 | case IR::Opcode::GetCbufU8: |
| 570 | case IR::Opcode::GetCbufS8: | 572 | case IR::Opcode::GetCbufS8: |
| 571 | info.used_constant_buffer_types |= IR::Type::U8; | 573 | info.used_constant_buffer_types |= IR::Type::U8; |
| 574 | element_size = 1; | ||
| 572 | break; | 575 | break; |
| 573 | case IR::Opcode::GetCbufU16: | 576 | case IR::Opcode::GetCbufU16: |
| 574 | case IR::Opcode::GetCbufS16: | 577 | case IR::Opcode::GetCbufS16: |
| 575 | info.used_constant_buffer_types |= IR::Type::U16; | 578 | info.used_constant_buffer_types |= IR::Type::U16; |
| 579 | element_size = 2; | ||
| 576 | break; | 580 | break; |
| 577 | case IR::Opcode::GetCbufU32: | 581 | case IR::Opcode::GetCbufU32: |
| 578 | info.used_constant_buffer_types |= IR::Type::U32; | 582 | info.used_constant_buffer_types |= IR::Type::U32; |
| 583 | element_size = 4; | ||
| 579 | break; | 584 | break; |
| 580 | case IR::Opcode::GetCbufF32: | 585 | case IR::Opcode::GetCbufF32: |
| 581 | info.used_constant_buffer_types |= IR::Type::F32; | 586 | info.used_constant_buffer_types |= IR::Type::F32; |
| 587 | element_size = 4; | ||
| 582 | break; | 588 | break; |
| 583 | case IR::Opcode::GetCbufU32x2: | 589 | case IR::Opcode::GetCbufU32x2: |
| 584 | info.used_constant_buffer_types |= IR::Type::U32x2; | 590 | info.used_constant_buffer_types |= IR::Type::U32x2; |
| 591 | element_size = 8; | ||
| 585 | break; | 592 | break; |
| 586 | default: | 593 | default: |
| 587 | break; | 594 | break; |
| 588 | } | 595 | } |
| 596 | u32& size{info.constant_buffer_used_sizes[index.U32()]}; | ||
| 597 | if (offset.IsImmediate()) { | ||
| 598 | size = std::max(size, offset.U32() + element_size); | ||
| 599 | } else { | ||
| 600 | size = 0x10'000; | ||
| 601 | } | ||
| 589 | break; | 602 | break; |
| 590 | } | 603 | } |
| 591 | case IR::Opcode::BindlessImageSampleImplicitLod: | 604 | case IR::Opcode::BindlessImageSampleImplicitLod: |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d5b2ca7bc..32f8a50ea 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -197,6 +197,7 @@ struct Info { | |||
| 197 | IR::Type used_storage_buffer_types{}; | 197 | IR::Type used_storage_buffer_types{}; |
| 198 | 198 | ||
| 199 | u32 constant_buffer_mask{}; | 199 | u32 constant_buffer_mask{}; |
| 200 | std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; | ||
| 200 | u32 nvn_buffer_base{}; | 201 | u32 nvn_buffer_base{}; |
| 201 | std::bitset<16> nvn_buffer_used{}; | 202 | std::bitset<16> nvn_buffer_used{}; |
| 202 | 203 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); | |||
| 44 | using BufferId = SlotId; | 44 | using BufferId = SlotId; |
| 45 | 45 | ||
| 46 | using VideoCore::Surface::PixelFormat; | 46 | using VideoCore::Surface::PixelFormat; |
| 47 | using namespace Common::Literals; | ||
| 47 | 48 | ||
| 48 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | 49 | constexpr u32 NUM_VERTEX_BUFFERS = 32; |
| 49 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | 50 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; |
| @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; | |||
| 53 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | 54 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; |
| 54 | constexpr u32 NUM_STAGES = 5; | 55 | constexpr u32 NUM_STAGES = 5; |
| 55 | 56 | ||
| 56 | using namespace Common::Literals; | 57 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; |
| 58 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 57 | 59 | ||
| 58 | template <typename P> | 60 | template <typename P> |
| 59 | class BufferCache { | 61 | class BufferCache { |
| @@ -142,9 +144,10 @@ public: | |||
| 142 | 144 | ||
| 143 | void BindHostComputeBuffers(); | 145 | void BindHostComputeBuffers(); |
| 144 | 146 | ||
| 145 | void SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask); | 147 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 148 | const UniformBufferSizes* sizes); | ||
| 146 | 149 | ||
| 147 | void SetEnabledComputeUniformBuffers(u32 enabled); | 150 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); |
| 148 | 151 | ||
| 149 | void UnbindGraphicsStorageBuffers(size_t stage); | 152 | void UnbindGraphicsStorageBuffers(size_t stage); |
| 150 | 153 | ||
| @@ -384,8 +387,11 @@ private: | |||
| 384 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | 387 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; |
| 385 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | 388 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; |
| 386 | 389 | ||
| 387 | std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; | 390 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; |
| 388 | u32 enabled_compute_uniform_buffers = 0; | 391 | u32 enabled_compute_uniform_buffer_mask = 0; |
| 392 | |||
| 393 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 394 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 389 | 395 | ||
| 390 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | 396 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; |
| 391 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | 397 | std::array<u32, NUM_STAGES> written_storage_buffers{}; |
| @@ -670,18 +676,22 @@ void BufferCache<P>::BindHostComputeBuffers() { | |||
| 670 | } | 676 | } |
| 671 | 677 | ||
| 672 | template <class P> | 678 | template <class P> |
| 673 | void BufferCache<P>::SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask) { | 679 | void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 680 | const UniformBufferSizes* sizes) { | ||
| 674 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 681 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 675 | if (enabled_uniform_buffers != mask) { | 682 | if (enabled_uniform_buffer_masks != mask) { |
| 676 | dirty_uniform_buffers.fill(~u32{0}); | 683 | dirty_uniform_buffers.fill(~u32{0}); |
| 677 | } | 684 | } |
| 678 | } | 685 | } |
| 679 | enabled_uniform_buffers = mask; | 686 | enabled_uniform_buffer_masks = mask; |
| 687 | uniform_buffer_sizes = sizes; | ||
| 680 | } | 688 | } |
| 681 | 689 | ||
| 682 | template <class P> | 690 | template <class P> |
| 683 | void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { | 691 | void BufferCache<P>::SetComputeUniformBufferState(u32 mask, |
| 684 | enabled_compute_uniform_buffers = enabled; | 692 | const ComputeUniformBufferSizes* sizes) { |
| 693 | enabled_compute_uniform_buffer_mask = mask; | ||
| 694 | compute_uniform_buffer_sizes = sizes; | ||
| 685 | } | 695 | } |
| 686 | 696 | ||
| 687 | template <class P> | 697 | template <class P> |
| @@ -984,7 +994,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | |||
| 984 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | 994 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); |
| 985 | } | 995 | } |
| 986 | u32 binding_index = 0; | 996 | u32 binding_index = 0; |
| 987 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 997 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 988 | const bool needs_bind = ((dirty >> index) & 1) != 0; | 998 | const bool needs_bind = ((dirty >> index) & 1) != 0; |
| 989 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | 999 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); |
| 990 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1000 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| @@ -998,7 +1008,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 998 | bool needs_bind) { | 1008 | bool needs_bind) { |
| 999 | const Binding& binding = uniform_buffers[stage][index]; | 1009 | const Binding& binding = uniform_buffers[stage][index]; |
| 1000 | const VAddr cpu_addr = binding.cpu_addr; | 1010 | const VAddr cpu_addr = binding.cpu_addr; |
| 1001 | const u32 size = binding.size; | 1011 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); |
| 1002 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1012 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1003 | TouchBuffer(buffer); | 1013 | TouchBuffer(buffer); |
| 1004 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 1014 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| @@ -1113,11 +1123,11 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1113 | dirty_uniform_buffers.fill(~u32{0}); | 1123 | dirty_uniform_buffers.fill(~u32{0}); |
| 1114 | } | 1124 | } |
| 1115 | u32 binding_index = 0; | 1125 | u32 binding_index = 0; |
| 1116 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1126 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1117 | const Binding& binding = compute_uniform_buffers[index]; | 1127 | const Binding& binding = compute_uniform_buffers[index]; |
| 1118 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1128 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1119 | TouchBuffer(buffer); | 1129 | TouchBuffer(buffer); |
| 1120 | const u32 size = binding.size; | 1130 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); |
| 1121 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1131 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1122 | 1132 | ||
| 1123 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1133 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| @@ -1261,7 +1271,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1261 | 1271 | ||
| 1262 | template <class P> | 1272 | template <class P> |
| 1263 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | 1273 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { |
| 1264 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 1274 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 1265 | Binding& binding = uniform_buffers[stage][index]; | 1275 | Binding& binding = uniform_buffers[stage][index]; |
| 1266 | if (binding.buffer_id) { | 1276 | if (binding.buffer_id) { |
| 1267 | // Already updated | 1277 | // Already updated |
| @@ -1334,7 +1344,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1334 | 1344 | ||
| 1335 | template <class P> | 1345 | template <class P> |
| 1336 | void BufferCache<P>::UpdateComputeUniformBuffers() { | 1346 | void BufferCache<P>::UpdateComputeUniformBuffers() { |
| 1337 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1347 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1338 | Binding& binding = compute_uniform_buffers[index]; | 1348 | Binding& binding = compute_uniform_buffers[index]; |
| 1339 | binding = NULL_BINDING; | 1349 | binding = NULL_BINDING; |
| 1340 | const auto& launch_desc = kepler_compute.launch_description; | 1350 | const auto& launch_desc = kepler_compute.launch_description; |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 5cf5f97a9..61b6fe4b7 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -43,6 +43,8 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac | |||
| 43 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, | 43 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, |
| 44 | kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, | 44 | kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, |
| 45 | source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { | 45 | source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { |
| 46 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | ||
| 47 | uniform_buffer_sizes.begin()); | ||
| 46 | 48 | ||
| 47 | num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); | 49 | num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); |
| 48 | num_image_buffers = AccumulateCount(info.image_buffer_descriptors); | 50 | num_image_buffers = AccumulateCount(info.image_buffer_descriptors); |
| @@ -63,7 +65,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac | |||
| 63 | } | 65 | } |
| 64 | 66 | ||
| 65 | void ComputePipeline::Configure() { | 67 | void ComputePipeline::Configure() { |
| 66 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | 68 | buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); |
| 67 | buffer_cache.UnbindComputeStorageBuffers(); | 69 | buffer_cache.UnbindComputeStorageBuffers(); |
| 68 | size_t ssbo_index{}; | 70 | size_t ssbo_index{}; |
| 69 | for (const auto& desc : info.storage_buffers_descriptors) { | 71 | for (const auto& desc : info.storage_buffers_descriptors) { |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index dd6b62ef2..b5dfb65e9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h | |||
| @@ -72,6 +72,7 @@ private: | |||
| 72 | Shader::Info info; | 72 | Shader::Info info; |
| 73 | OGLProgram source_program; | 73 | OGLProgram source_program; |
| 74 | OGLAssemblyProgram assembly_program; | 74 | OGLAssemblyProgram assembly_program; |
| 75 | VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; | ||
| 75 | 76 | ||
| 76 | u32 num_texture_buffers{}; | 77 | u32 num_texture_buffers{}; |
| 77 | u32 num_image_buffers{}; | 78 | u32 num_image_buffers{}; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 976897067..a5d65fdca 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -60,6 +60,14 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | |||
| 60 | UNIMPLEMENTED_MSG("index={}", index); | 60 | UNIMPLEMENTED_MSG("index={}", index); |
| 61 | return {GL_POSITION, 0}; | 61 | return {GL_POSITION, 0}; |
| 62 | } | 62 | } |
| 63 | |||
| 64 | struct Spec { | ||
| 65 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 66 | static constexpr bool has_storage_buffers = true; | ||
| 67 | static constexpr bool has_texture_buffers = true; | ||
| 68 | static constexpr bool has_image_buffers = true; | ||
| 69 | static constexpr bool has_images = true; | ||
| 70 | }; | ||
| 63 | } // Anonymous namespace | 71 | } // Anonymous namespace |
| 64 | 72 | ||
| 65 | size_t GraphicsPipelineKey::Hash() const noexcept { | 73 | size_t GraphicsPipelineKey::Hash() const noexcept { |
| @@ -100,7 +108,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 100 | base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); | 108 | base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); |
| 101 | base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); | 109 | base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); |
| 102 | } | 110 | } |
| 103 | enabled_uniform_buffers[stage] = info.constant_buffer_mask; | 111 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; |
| 112 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 104 | 113 | ||
| 105 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; | 114 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; |
| 106 | num_texture_buffers[stage] += num_tex_buffer_bindings; | 115 | num_texture_buffers[stage] += num_tex_buffer_bindings; |
| @@ -130,14 +139,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 130 | } | 139 | } |
| 131 | } | 140 | } |
| 132 | 141 | ||
| 133 | struct Spec { | ||
| 134 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 135 | static constexpr bool has_storage_buffers = true; | ||
| 136 | static constexpr bool has_texture_buffers = true; | ||
| 137 | static constexpr bool has_image_buffers = true; | ||
| 138 | static constexpr bool has_images = true; | ||
| 139 | }; | ||
| 140 | |||
| 141 | void GraphicsPipeline::Configure(bool is_indexed) { | 142 | void GraphicsPipeline::Configure(bool is_indexed) { |
| 142 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | 143 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; |
| 143 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | 144 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; |
| @@ -147,7 +148,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { | |||
| 147 | 148 | ||
| 148 | texture_cache.SynchronizeGraphicsDescriptors(); | 149 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 149 | 150 | ||
| 150 | buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); | 151 | buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); |
| 151 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); | 152 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); |
| 152 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); | 153 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); |
| 153 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); | 154 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index bf33ce604..508fad5bb 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -99,7 +99,8 @@ private: | |||
| 99 | u32 enabled_stages_mask{}; | 99 | u32 enabled_stages_mask{}; |
| 100 | 100 | ||
| 101 | std::array<Shader::Info, 5> stage_infos{}; | 101 | std::array<Shader::Info, 5> stage_infos{}; |
| 102 | std::array<u32, 5> enabled_uniform_buffers{}; | 102 | std::array<u32, 5> enabled_uniform_buffer_masks{}; |
| 103 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | ||
| 103 | std::array<u32, 5> base_uniform_bindings{}; | 104 | std::array<u32, 5> base_uniform_bindings{}; |
| 104 | std::array<u32, 5> base_storage_bindings{}; | 105 | std::array<u32, 5> base_storage_bindings{}; |
| 105 | std::array<u32, 5> num_texture_buffers{}; | 106 | std::array<u32, 5> num_texture_buffers{}; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 168ffa7e9..ca59042ff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <vector> | 6 | #include <vector> |
| 6 | 7 | ||
| 7 | #include <boost/container/small_vector.hpp> | 8 | #include <boost/container/small_vector.hpp> |
| @@ -27,6 +28,9 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript | |||
| 27 | vk::ShaderModule spv_module_) | 28 | vk::ShaderModule spv_module_) |
| 28 | : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, | 29 | : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, |
| 29 | spv_module(std::move(spv_module_)) { | 30 | spv_module(std::move(spv_module_)) { |
| 31 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | ||
| 32 | uniform_buffer_sizes.begin()); | ||
| 33 | |||
| 30 | auto func{[this, &descriptor_pool] { | 34 | auto func{[this, &descriptor_pool] { |
| 31 | DescriptorLayoutBuilder builder{device.GetLogical()}; | 35 | DescriptorLayoutBuilder builder{device.GetLogical()}; |
| 32 | builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); | 36 | builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); |
| @@ -75,7 +79,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 75 | BufferCache& buffer_cache, TextureCache& texture_cache) { | 79 | BufferCache& buffer_cache, TextureCache& texture_cache) { |
| 76 | update_descriptor_queue.Acquire(); | 80 | update_descriptor_queue.Acquire(); |
| 77 | 81 | ||
| 78 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | 82 | buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); |
| 79 | buffer_cache.UnbindComputeStorageBuffers(); | 83 | buffer_cache.UnbindComputeStorageBuffers(); |
| 80 | size_t ssbo_index{}; | 84 | size_t ssbo_index{}; |
| 81 | for (const auto& desc : info.storage_buffers_descriptors) { | 85 | for (const auto& desc : info.storage_buffers_descriptors) { |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index a560e382e..a6043866d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -44,6 +44,8 @@ private: | |||
| 44 | VKUpdateDescriptorQueue& update_descriptor_queue; | 44 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 45 | Shader::Info info; | 45 | Shader::Info info; |
| 46 | 46 | ||
| 47 | VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; | ||
| 48 | |||
| 47 | vk::ShaderModule spv_module; | 49 | vk::ShaderModule spv_module; |
| 48 | vk::DescriptorSetLayout descriptor_set_layout; | 50 | vk::DescriptorSetLayout descriptor_set_layout; |
| 49 | DescriptorAllocator descriptor_allocator; | 51 | DescriptorAllocator descriptor_allocator; |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d381109d6..627ca0158 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -218,10 +218,14 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, | |||
| 218 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { | 218 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { |
| 219 | std::ranges::transform(infos, stage_infos.begin(), | 219 | std::ranges::transform(infos, stage_infos.begin(), |
| 220 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | 220 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); |
| 221 | std::ranges::transform(infos, enabled_uniform_buffers.begin(), [](const Shader::Info* info) { | 221 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { |
| 222 | return info ? info->constant_buffer_mask : 0; | 222 | const Shader::Info* const info{infos[stage]}; |
| 223 | }); | 223 | if (!info) { |
| 224 | 224 | continue; | |
| 225 | } | ||
| 226 | enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; | ||
| 227 | std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 228 | } | ||
| 225 | auto func{[this, &render_pass_cache, &descriptor_pool] { | 229 | auto func{[this, &render_pass_cache, &descriptor_pool] { |
| 226 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; | 230 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; |
| 227 | descriptor_set_layout = builder.CreateDescriptorSetLayout(); | 231 | descriptor_set_layout = builder.CreateDescriptorSetLayout(); |
| @@ -262,7 +266,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 262 | 266 | ||
| 263 | texture_cache.SynchronizeGraphicsDescriptors(); | 267 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 264 | 268 | ||
| 265 | buffer_cache.SetEnabledUniformBuffers(enabled_uniform_buffers); | 269 | buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); |
| 266 | 270 | ||
| 267 | const auto& regs{maxwell3d.regs}; | 271 | const auto& regs{maxwell3d.regs}; |
| 268 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | 272 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4068a0edc..8c81c28a8 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -130,7 +130,8 @@ private: | |||
| 130 | std::array<vk::ShaderModule, NUM_STAGES> spv_modules; | 130 | std::array<vk::ShaderModule, NUM_STAGES> spv_modules; |
| 131 | 131 | ||
| 132 | std::array<Shader::Info, NUM_STAGES> stage_infos; | 132 | std::array<Shader::Info, NUM_STAGES> stage_infos; |
| 133 | std::array<u32, 5> enabled_uniform_buffers{}; | 133 | std::array<u32, 5> enabled_uniform_buffer_masks{}; |
| 134 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | ||
| 134 | 135 | ||
| 135 | vk::DescriptorSetLayout descriptor_set_layout; | 136 | vk::DescriptorSetLayout descriptor_set_layout; |
| 136 | DescriptorAllocator descriptor_allocator; | 137 | DescriptorAllocator descriptor_allocator; |