summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-10-28 18:44:27 -0400
committerGravatar Ameer J2023-10-31 20:10:54 -0400
commit7d348005317c1d5f88b2472de64a083defa2feab (patch)
tree6f5b26390e2d9794c9031b4887d9e96cdb0a0e8b /src
parentMerge pull request #11931 from t895/applet-launcher (diff)
downloadyuzu-7d348005317c1d5f88b2472de64a083defa2feab.tar.gz
yuzu-7d348005317c1d5f88b2472de64a083defa2feab.tar.xz
yuzu-7d348005317c1d5f88b2472de64a083defa2feab.zip
shader_recompiler: Align SSBO offsets to meet host requirements
Co-Authored-By: Billy Laws <blaws05@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h1
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h11
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
10 files changed, 32 insertions, 9 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 928b35561..47183cae1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -298,7 +298,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
298 298
299 Optimization::PositionPass(env, program); 299 Optimization::PositionPass(env, program);
300 300
301 Optimization::GlobalMemoryToStorageBufferPass(program); 301 Optimization::GlobalMemoryToStorageBufferPass(program, host_info);
302 Optimization::TexturePass(env, program, host_info); 302 Optimization::TexturePass(env, program, host_info);
303 303
304 if (Settings::values.resolution_info.active) { 304 if (Settings::values.resolution_info.active) {
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 7d2ded907..1b53404fc 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -16,6 +16,7 @@ struct HostTranslateInfo {
16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
17 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers 17 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
18 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS 18 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
19 u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
19 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry 20 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
20 ///< passthrough shaders 21 ///< passthrough shaders
21 bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional 22 bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index d1e59f22e..0cea79945 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
11#include "shader_recompiler/frontend/ir/breadth_first_search.h" 11#include "shader_recompiler/frontend/ir/breadth_first_search.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/host_translate_info.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
16namespace Shader::Optimization { 17namespace Shader::Optimization {
@@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
408} 409}
409 410
410/// Returns the offset in indices (not bytes) for an equivalent storage instruction 411/// Returns the offset in indices (not bytes) for an equivalent storage instruction
411IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { 412IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
412 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; 413 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
413 IR::U32 offset; 414 IR::U32 offset;
414 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { 415 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
421 } 422 }
422 // Subtract the least significant 32 bits from the guest offset. The result is the storage 423 // Subtract the least significant 32 bits from the guest offset. The result is the storage
423 // buffer offset in bytes. 424 // buffer offset in bytes.
424 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; 425 IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
426
427 // Align the offset base to match the host alignment requirements
428 low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
425 return ir.ISub(offset, low_cbuf); 429 return ir.ISub(offset, low_cbuf);
426} 430}
427 431
@@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
516} 520}
517} // Anonymous namespace 521} // Anonymous namespace
518 522
519void GlobalMemoryToStorageBufferPass(IR::Program& program) { 523void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
520 StorageInfo info; 524 StorageInfo info;
521 for (IR::Block* const block : program.post_order_blocks) { 525 for (IR::Block* const block : program.post_order_blocks) {
522 for (IR::Inst& inst : block->Instructions()) { 526 for (IR::Inst& inst : block->Instructions()) {
@@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
540 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; 544 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
541 IR::Block* const block{storage_inst.block}; 545 IR::Block* const block{storage_inst.block};
542 IR::Inst* const inst{storage_inst.inst}; 546 IR::Inst* const inst{storage_inst.inst};
543 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; 547 const IR::U32 offset{
548 StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
544 Replace(*block, *inst, index, offset); 549 Replace(*block, *inst, index, offset);
545 } 550 }
546} 551}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 629d18fa1..7082fc5f2 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConditionalBarrierPass(IR::Program& program); 16void ConditionalBarrierPass(IR::Program& program);
17void ConstantPropagationPass(Environment& env, IR::Program& program); 17void ConstantPropagationPass(Environment& env, IR::Program& program);
18void DeadCodeEliminationPass(IR::Program& program); 18void DeadCodeEliminationPass(IR::Program& program);
19void GlobalMemoryToStorageBufferPass(IR::Program& program); 19void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info);
20void IdentityRemovalPass(IR::Program& program); 20void IdentityRemovalPass(IR::Program& program);
21void LowerFp64ToFp32(IR::Program& program); 21void LowerFp64ToFp32(IR::Program& program);
22void LowerFp16ToFp32(IR::Program& program); 22void LowerFp16ToFp32(IR::Program& program);
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 081a574e8..9202e53c7 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1770,6 +1770,7 @@ template <class P>
1770Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, 1770Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1771 bool is_written) const { 1771 bool is_written) const {
1772 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1772 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1773 const u32 alignment = runtime.GetStorageBufferAlignment();
1773 const auto size = [&]() { 1774 const auto size = [&]() {
1774 const bool is_nvn_cbuf = cbuf_index == 0; 1775 const bool is_nvn_cbuf = cbuf_index == 0;
1775 // The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size. 1776 // The NVN driver buffer (index 0) is known to pack the SSBO address followed by its size.
@@ -1785,15 +1786,19 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1785 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); 1786 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
1786 return std::min(memory_layout_size, static_cast<u32>(8_MiB)); 1787 return std::min(memory_layout_size, static_cast<u32>(8_MiB));
1787 }(); 1788 }();
1788 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1789 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1790 const u32 aligned_size =
1791 Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
1792
1793 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1789 if (!cpu_addr || size == 0) { 1794 if (!cpu_addr || size == 0) {
1790 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1795 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
1791 return NULL_BINDING; 1796 return NULL_BINDING;
1792 } 1797 }
1793 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); 1798 const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
1794 const Binding binding{ 1799 const Binding binding{
1795 .cpu_addr = *cpu_addr, 1800 .cpu_addr = *cpu_addr,
1796 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1801 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr),
1797 .buffer_id = BufferId{}, 1802 .buffer_id = BufferId{},
1798 }; 1803 };
1799 return binding; 1804 return binding;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 41b746f3b..e8dbbd3a2 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -182,6 +182,10 @@ public:
182 return device.CanReportMemoryUsage(); 182 return device.CanReportMemoryUsage();
183 } 183 }
184 184
185 u32 GetStorageBufferAlignment() const {
186 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
187 }
188
185private: 189private:
186 static constexpr std::array PABO_LUT{ 190 static constexpr std::array PABO_LUT{
187 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 191 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 2888e0238..69f6759e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -240,6 +240,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
240 .needs_demote_reorder = device.IsAmd(), 240 .needs_demote_reorder = device.IsAmd(),
241 .support_snorm_render_buffer = false, 241 .support_snorm_render_buffer = false,
242 .support_viewport_index_layer = device.HasVertexViewportLayer(), 242 .support_viewport_index_layer = device.HasVertexViewportLayer(),
243 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
243 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), 244 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
244 .support_conditional_barrier = device.SupportsConditionalBarriers(), 245 .support_conditional_barrier = device.SupportsConditionalBarriers(),
245 } { 246 } {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index d8148e89a..976c3f6a6 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -355,6 +355,10 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
355 return device.CanReportMemoryUsage(); 355 return device.CanReportMemoryUsage();
356} 356}
357 357
358u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
359 return static_cast<u32>(device.GetStorageBufferAlignment());
360}
361
358void BufferCacheRuntime::Finish() { 362void BufferCacheRuntime::Finish() {
359 scheduler.Finish(); 363 scheduler.Finish();
360} 364}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 95446c732..833dfac45 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -75,6 +75,8 @@ public:
75 75
76 bool CanReportMemoryUsage() const; 76 bool CanReportMemoryUsage() const;
77 77
78 u32 GetStorageBufferAlignment() const;
79
78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 80 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
79 81
80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); 82 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 22bf8cc77..5e4380175 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -369,6 +369,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
369 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 369 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
370 .support_snorm_render_buffer = true, 370 .support_snorm_render_buffer = true,
371 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 371 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
372 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
372 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), 373 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
373 .support_conditional_barrier = device.SupportsConditionalBarriers(), 374 .support_conditional_barrier = device.SupportsConditionalBarriers(),
374 }; 375 };