diff options
Diffstat (limited to 'src')
7 files changed, 19 insertions, 6 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 2705ab140..9319ea007 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" | 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" |
| 6 | #include "shader_recompiler/frontend/ir/program.h" | 6 | #include "shader_recompiler/frontend/ir/program.h" |
| 7 | #include "shader_recompiler/frontend/ir/value.h" | 7 | #include "shader_recompiler/frontend/ir/value.h" |
| 8 | #include "shader_recompiler/profile.h" | ||
| 8 | #include "shader_recompiler/runtime_info.h" | 9 | #include "shader_recompiler/runtime_info.h" |
| 9 | 10 | ||
| 10 | namespace Shader::Backend::GLASM { | 11 | namespace Shader::Backend::GLASM { |
| @@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 35 | continue; | 36 | continue; |
| 36 | } | 37 | } |
| 37 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | 38 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; |
| 38 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | 39 | const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)}; |
| 40 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // unaligned_ssbo_addr | ||
| 41 | "AND.U64 DC.x,DC.x,{};" // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask | ||
| 39 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 |
| 40 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 |
| 41 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size |
| @@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 44 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b |
| 45 | "IF NE.x;" // if cond | 48 | "IF NE.x;" // if cond |
| 46 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr |
| 47 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index, |
| 48 | address, address); | 51 | ssbo.cbuf_offset + 8, address, address, address); |
| 49 | if (pointer_based) { | 52 | if (pointer_based) { |
| 50 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf |
| 51 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset |
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 9ff4028c2..fd9a99449 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||
| 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); |
| 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); |
| 603 | } | 603 | } |
| 604 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | 604 | const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; |
| 605 | const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; | ||
| 606 | const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; | ||
| 607 | const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; | ||
| 605 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | 608 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; |
| 606 | func += addr_statment; | 609 | func += addr_statment; |
| 607 | 610 | ||
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 57df6fc34..3350f1f85 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||
| 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, |
| 892 | zero, ssbo_size_cbuf_offset)}; | 892 | zero, ssbo_size_cbuf_offset)}; |
| 893 | 893 | ||
| 894 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | 894 | const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; |
| 895 | const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 896 | const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; | ||
| 895 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | 897 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; |
| 896 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | 898 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; |
| 897 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | 899 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 38d820db2..e62ba8a20 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -84,6 +84,8 @@ struct Profile { | |||
| 84 | 84 | ||
| 85 | /// Maxwell and earlier nVidia architectures have broken robust support | 85 | /// Maxwell and earlier nVidia architectures have broken robust support |
| 86 | bool has_broken_robust{}; | 86 | bool has_broken_robust{}; |
| 87 | |||
| 88 | u64 min_ssbo_alignment{}; | ||
| 87 | }; | 89 | }; |
| 88 | 90 | ||
| 89 | } // namespace Shader | 91 | } // namespace Shader |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5574c6130..2648970b6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1796,7 +1796,8 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||
| 1796 | return NULL_BINDING; | 1796 | return NULL_BINDING; |
| 1797 | } | 1797 | } |
| 1798 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1798 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1799 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", cbuf_index); | 1799 | ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", |
| 1800 | cbuf_index); | ||
| 1800 | // The end address used for size calculation does not need to be aligned | 1801 | // The end address used for size calculation does not need to be aligned |
| 1801 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1802 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); |
| 1802 | 1803 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 69f6759e6..26f2d0ea7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 232 | .has_gl_bool_ref_bug = device.HasBoolRefBug(), | 232 | .has_gl_bool_ref_bug = device.HasBoolRefBug(), |
| 233 | .ignore_nan_fp_comparisons = true, | 233 | .ignore_nan_fp_comparisons = true, |
| 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | 234 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |
| 235 | .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), | ||
| 235 | }, | 236 | }, |
| 236 | host_info{ | 237 | host_info{ |
| 237 | .support_float64 = true, | 238 | .support_float64 = true, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5e4380175..0d604eee3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -359,6 +359,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 359 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | 359 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |
| 360 | .has_broken_robust = | 360 | .has_broken_robust = |
| 361 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, | 361 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, |
| 362 | .min_ssbo_alignment = device.GetStorageBufferAlignment(), | ||
| 362 | }; | 363 | }; |
| 363 | 364 | ||
| 364 | host_info = Shader::HostTranslateInfo{ | 365 | host_info = Shader::HostTranslateInfo{ |