diff options
| author | 2023-10-28 18:44:27 -0400 | |
|---|---|---|
| committer | 2023-10-31 20:10:54 -0400 | |
| commit | 7d348005317c1d5f88b2472de64a083defa2feab (patch) | |
| tree | 6f5b26390e2d9794c9031b4887d9e96cdb0a0e8b /src | |
| parent | Merge pull request #11931 from t895/applet-launcher (diff) | |
| download | yuzu-7d348005317c1d5f88b2472de64a083defa2feab.tar.gz yuzu-7d348005317c1d5f88b2472de64a083defa2feab.tar.xz yuzu-7d348005317c1d5f88b2472de64a083defa2feab.zip | |
shader_recompiler: Align SSBO offsets to meet host requirements
Co-Authored-By: Billy Laws <blaws05@gmail.com>
Diffstat (limited to 'src')
10 files changed, 32 insertions, 9 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..47183cae1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -298,7 +298,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 298 | 298 | ||
| 299 | Optimization::PositionPass(env, program); | 299 | Optimization::PositionPass(env, program); |
| 300 | 300 | ||
| 301 | Optimization::GlobalMemoryToStorageBufferPass(program); | 301 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| 302 | Optimization::TexturePass(env, program, host_info); | 302 | Optimization::TexturePass(env, program, host_info); |
| 303 | 303 | ||
| 304 | if (Settings::values.resolution_info.active) { | 304 | if (Settings::values.resolution_info.active) { |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 7d2ded907..1b53404fc 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -16,6 +16,7 @@ struct HostTranslateInfo { | |||
| 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers | 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers |
| 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 19 | u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs | ||
| 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | 20 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry |
| 20 | ///< passthrough shaders | 21 | ///< passthrough shaders |
| 21 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional | 22 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d1e59f22e..0cea79945 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/host_translate_info.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| 16 | namespace Shader::Optimization { | 17 | namespace Shader::Optimization { |
| @@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 408 | } | 409 | } |
| 409 | 410 | ||
| 410 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 411 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 411 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | 412 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { |
| 412 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | 413 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 413 | IR::U32 offset; | 414 | IR::U32 offset; |
| 414 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | 415 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| @@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 421 | } | 422 | } |
| 422 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 423 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 423 | // buffer offset in bytes. | 424 | // buffer offset in bytes. |
| 424 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | 425 | IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |
| 426 | |||
| 427 | // Align the offset base to match the host alignment requirements | ||
| 428 | low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||
| 425 | return ir.ISub(offset, low_cbuf); | 429 | return ir.ISub(offset, low_cbuf); |
| 426 | } | 430 | } |
| 427 | 431 | ||
| @@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 516 | } | 520 | } |
| 517 | } // Anonymous namespace | 521 | } // Anonymous namespace |
| 518 | 522 | ||
| 519 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | 523 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { |
| 520 | StorageInfo info; | 524 | StorageInfo info; |
| 521 | for (IR::Block* const block : program.post_order_blocks) { | 525 | for (IR::Block* const block : program.post_order_blocks) { |
| 522 | for (IR::Inst& inst : block->Instructions()) { | 526 | for (IR::Inst& inst : block->Instructions()) { |
| @@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 540 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | 544 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 541 | IR::Block* const block{storage_inst.block}; | 545 | IR::Block* const block{storage_inst.block}; |
| 542 | IR::Inst* const inst{storage_inst.inst}; | 546 | IR::Inst* const inst{storage_inst.inst}; |
| 543 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | 547 | const IR::U32 offset{ |
| 548 | StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||
| 544 | Replace(*block, *inst, index, offset); | 549 | Replace(*block, *inst, index, offset); |
| 545 | } | 550 | } |
| 546 | } | 551 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..7082fc5f2 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program); | |||
| 16 | void ConditionalBarrierPass(IR::Program& program); | 16 | void ConditionalBarrierPass(IR::Program& program); |
| 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 18 | void DeadCodeEliminationPass(IR::Program& program); | 18 | void DeadCodeEliminationPass(IR::Program& program); |
| 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 20 | void IdentityRemovalPass(IR::Program& program); | 20 | void IdentityRemovalPass(IR::Program& program); |
| 21 | void LowerFp64ToFp32(IR::Program& program); | 21 | void LowerFp64ToFp32(IR::Program& program); |
| 22 | void LowerFp16ToFp32(IR::Program& program); | 22 | void LowerFp16ToFp32(IR::Program& program); |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 081a574e8..9202e53c7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1770,6 +1770,7 @@ template <class P> | |||
| 1770 | Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | 1770 | Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, |
| 1771 | bool is_written) const { | 1771 | bool is_written) const { |
| 1772 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); | 1772 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); |
| 1773 | const u32 alignment = runtime.GetStorageBufferAlignment(); | ||
| 1773 | const auto size = [&]() { | 1774 | const auto size = [&]() { |
| 1774 | const bool is_nvn_cbuf = cbuf_index == 0; | 1775 | const bool is_nvn_cbuf = cbuf_index == 0; |
| 1775 | // The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size. | 1776 | // The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size. |
| @@ -1785,15 +1786,19 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1785 | const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); | 1786 | const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); |
| 1786 | return std::min(memory_layout_size, static_cast<u32>(8_MiB)); | 1787 | return std::min(memory_layout_size, static_cast<u32>(8_MiB)); |
| 1787 | }(); | 1788 | }(); |
| 1788 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1789 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); |
| 1790 | const u32 aligned_size = | ||
| 1791 | Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment); | ||
| 1792 | |||
| 1793 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | ||
| 1789 | if (!cpu_addr || size == 0) { | 1794 | if (!cpu_addr || size == 0) { |
| 1790 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1795 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 1791 | return NULL_BINDING; | 1796 | return NULL_BINDING; |
| 1792 | } | 1797 | } |
| 1793 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); | 1798 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); |
| 1794 | const Binding binding{ | 1799 | const Binding binding{ |
| 1795 | .cpu_addr = *cpu_addr, | 1800 | .cpu_addr = *cpu_addr, |
| 1796 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), | 1801 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), |
| 1797 | .buffer_id = BufferId{}, | 1802 | .buffer_id = BufferId{}, |
| 1798 | }; | 1803 | }; |
| 1799 | return binding; | 1804 | return binding; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 41b746f3b..e8dbbd3a2 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -182,6 +182,10 @@ public: | |||
| 182 | return device.CanReportMemoryUsage(); | 182 | return device.CanReportMemoryUsage(); |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | u32 GetStorageBufferAlignment() const { | ||
| 186 | return static_cast<u32>(device.GetShaderStorageBufferAlignment()); | ||
| 187 | } | ||
| 188 | |||
| 185 | private: | 189 | private: |
| 186 | static constexpr std::array PABO_LUT{ | 190 | static constexpr std::array PABO_LUT{ |
| 187 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 191 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 2888e0238..69f6759e6 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -240,6 +240,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 240 | .needs_demote_reorder = device.IsAmd(), | 240 | .needs_demote_reorder = device.IsAmd(), |
| 241 | .support_snorm_render_buffer = false, | 241 | .support_snorm_render_buffer = false, |
| 242 | .support_viewport_index_layer = device.HasVertexViewportLayer(), | 242 | .support_viewport_index_layer = device.HasVertexViewportLayer(), |
| 243 | .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), | ||
| 243 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), | 244 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), |
| 244 | .support_conditional_barrier = device.SupportsConditionalBarriers(), | 245 | .support_conditional_barrier = device.SupportsConditionalBarriers(), |
| 245 | } { | 246 | } { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8148e89a..976c3f6a6 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -355,6 +355,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { | |||
| 355 | return device.CanReportMemoryUsage(); | 355 | return device.CanReportMemoryUsage(); |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | u32 BufferCacheRuntime::GetStorageBufferAlignment() const { | ||
| 359 | return static_cast<u32>(device.GetStorageBufferAlignment()); | ||
| 360 | } | ||
| 361 | |||
| 358 | void BufferCacheRuntime::Finish() { | 362 | void BufferCacheRuntime::Finish() { |
| 359 | scheduler.Finish(); | 363 | scheduler.Finish(); |
| 360 | } | 364 | } |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 95446c732..833dfac45 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -75,6 +75,8 @@ public: | |||
| 75 | 75 | ||
| 76 | bool CanReportMemoryUsage() const; | 76 | bool CanReportMemoryUsage() const; |
| 77 | 77 | ||
| 78 | u32 GetStorageBufferAlignment() const; | ||
| 79 | |||
| 78 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 80 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 79 | 81 | ||
| 80 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); | 82 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 22bf8cc77..5e4380175 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -369,6 +369,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 369 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, | 369 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, |
| 370 | .support_snorm_render_buffer = true, | 370 | .support_snorm_render_buffer = true, |
| 371 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), | 371 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), |
| 372 | .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), | ||
| 372 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 373 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
| 373 | .support_conditional_barrier = device.SupportsConditionalBarriers(), | 374 | .support_conditional_barrier = device.SupportsConditionalBarriers(), |
| 374 | }; | 375 | }; |