diff options
Diffstat (limited to 'src')
10 files changed, 9 insertions, 34 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index a42453e90..17a6d4888 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -292,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 292 | 292 | ||
| 293 | Optimization::PositionPass(env, program); | 293 | Optimization::PositionPass(env, program); |
| 294 | 294 | ||
| 295 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); | 295 | Optimization::GlobalMemoryToStorageBufferPass(program); |
| 296 | Optimization::TexturePass(env, program, host_info); | 296 | Optimization::TexturePass(env, program, host_info); |
| 297 | 297 | ||
| 298 | if (Settings::values.resolution_info.active) { | 298 | if (Settings::values.resolution_info.active) { |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 55fc48768..2aaa6c5ea 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -15,7 +15,6 @@ struct HostTranslateInfo { | |||
| 15 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 15 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| 16 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers | 16 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers |
| 17 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 17 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 18 | u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs | ||
| 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | 18 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry |
| 20 | ///< passthrough shaders | 19 | ///< passthrough shaders |
| 21 | }; | 20 | }; |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 9101722ba..336338e62 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/host_translate_info.h" | ||
| 15 | #include "shader_recompiler/ir_opt/passes.h" | 14 | #include "shader_recompiler/ir_opt/passes.h" |
| 16 | 15 | ||
| 17 | namespace Shader::Optimization { | 16 | namespace Shader::Optimization { |
| @@ -403,7 +402,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 403 | } | 402 | } |
| 404 | 403 | ||
| 405 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 404 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 406 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { | 405 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { |
| 407 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | 406 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 408 | IR::U32 offset; | 407 | IR::U32 offset; |
| 409 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | 408 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| @@ -416,10 +415,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 416 | } | 415 | } |
| 417 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 416 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 418 | // buffer offset in bytes. | 417 | // buffer offset in bytes. |
| 419 | IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | 418 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |
| 420 | |||
| 421 | // Align the offset base to match the host alignment requirements | ||
| 422 | low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||
| 423 | return ir.ISub(offset, low_cbuf); | 419 | return ir.ISub(offset, low_cbuf); |
| 424 | } | 420 | } |
| 425 | 421 | ||
| @@ -514,7 +510,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 514 | } | 510 | } |
| 515 | } // Anonymous namespace | 511 | } // Anonymous namespace |
| 516 | 512 | ||
| 517 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { | 513 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { |
| 518 | StorageInfo info; | 514 | StorageInfo info; |
| 519 | for (IR::Block* const block : program.post_order_blocks) { | 515 | for (IR::Block* const block : program.post_order_blocks) { |
| 520 | for (IR::Inst& inst : block->Instructions()) { | 516 | for (IR::Inst& inst : block->Instructions()) { |
| @@ -538,8 +534,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn | |||
| 538 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | 534 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 539 | IR::Block* const block{storage_inst.block}; | 535 | IR::Block* const block{storage_inst.block}; |
| 540 | IR::Inst* const inst{storage_inst.inst}; | 536 | IR::Inst* const inst{storage_inst.inst}; |
| 541 | const IR::U32 offset{ | 537 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; |
| 542 | StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||
| 543 | Replace(*block, *inst, index, offset); | 538 | Replace(*block, *inst, index, offset); |
| 544 | } | 539 | } |
| 545 | } | 540 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 4ffad1172..1f8f2ba95 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -15,7 +15,7 @@ namespace Shader::Optimization { | |||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); |
| 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 16 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 17 | void DeadCodeEliminationPass(IR::Program& program); | 17 | void DeadCodeEliminationPass(IR::Program& program); |
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); | 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); |
| 19 | void IdentityRemovalPass(IR::Program& program); | 19 | void IdentityRemovalPass(IR::Program& program); |
| 20 | void LowerFp16ToFp32(IR::Program& program); | 20 | void LowerFp16ToFp32(IR::Program& program); |
| 21 | void LowerInt64ToInt32(IR::Program& program); | 21 | void LowerInt64ToInt32(IR::Program& program); |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 627917ab6..06fd40851 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1938,21 +1938,14 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 1938 | bool is_written) const { | 1938 | bool is_written) const { |
| 1939 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); | 1939 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); |
| 1940 | const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); | 1940 | const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); |
| 1941 | const u32 alignment = runtime.GetStorageBufferAlignment(); | 1941 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1942 | |||
| 1943 | const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment); | ||
| 1944 | const u32 aligned_size = | ||
| 1945 | Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment); | ||
| 1946 | |||
| 1947 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr); | ||
| 1948 | if (!cpu_addr || size == 0) { | 1942 | if (!cpu_addr || size == 0) { |
| 1949 | return NULL_BINDING; | 1943 | return NULL_BINDING; |
| 1950 | } | 1944 | } |
| 1951 | 1945 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | |
| 1952 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE); | ||
| 1953 | const Binding binding{ | 1946 | const Binding binding{ |
| 1954 | .cpu_addr = *cpu_addr, | 1947 | .cpu_addr = *cpu_addr, |
| 1955 | .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), | 1948 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), |
| 1956 | .buffer_id = BufferId{}, | 1949 | .buffer_id = BufferId{}, |
| 1957 | }; | 1950 | }; |
| 1958 | return binding; | 1951 | return binding; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index bb1962073..a8c3f8b67 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -160,10 +160,6 @@ public: | |||
| 160 | return device.CanReportMemoryUsage(); | 160 | return device.CanReportMemoryUsage(); |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | u32 GetStorageBufferAlignment() const { | ||
| 164 | return static_cast<u32>(device.GetShaderStorageBufferAlignment()); | ||
| 165 | } | ||
| 166 | |||
| 167 | private: | 163 | private: |
| 168 | static constexpr std::array PABO_LUT{ | 164 | static constexpr std::array PABO_LUT{ |
| 169 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 165 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 626ea7dcb..479bb8ba3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -236,7 +236,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 236 | .needs_demote_reorder = device.IsAmd(), | 236 | .needs_demote_reorder = device.IsAmd(), |
| 237 | .support_snorm_render_buffer = false, | 237 | .support_snorm_render_buffer = false, |
| 238 | .support_viewport_index_layer = device.HasVertexViewportLayer(), | 238 | .support_viewport_index_layer = device.HasVertexViewportLayer(), |
| 239 | .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()), | ||
| 240 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), | 239 | .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), |
| 241 | } { | 240 | } { |
| 242 | if (use_asynchronous_shaders) { | 241 | if (use_asynchronous_shaders) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1cfb4c2ff..b0153a502 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -330,10 +330,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const { | |||
| 330 | return device.CanReportMemoryUsage(); | 330 | return device.CanReportMemoryUsage(); |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | u32 BufferCacheRuntime::GetStorageBufferAlignment() const { | ||
| 334 | return static_cast<u32>(device.GetStorageBufferAlignment()); | ||
| 335 | } | ||
| 336 | |||
| 337 | void BufferCacheRuntime::Finish() { | 333 | void BufferCacheRuntime::Finish() { |
| 338 | scheduler.Finish(); | 334 | scheduler.Finish(); |
| 339 | } | 335 | } |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 06539c733..183b33632 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -73,8 +73,6 @@ public: | |||
| 73 | 73 | ||
| 74 | bool CanReportMemoryUsage() const; | 74 | bool CanReportMemoryUsage() const; |
| 75 | 75 | ||
| 76 | u32 GetStorageBufferAlignment() const; | ||
| 77 | |||
| 78 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 76 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 79 | 77 | ||
| 80 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 78 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7e69b11d8..0684cceed 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -344,7 +344,6 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 344 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, | 344 | driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, |
| 345 | .support_snorm_render_buffer = true, | 345 | .support_snorm_render_buffer = true, |
| 346 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), | 346 | .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), |
| 347 | .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()), | ||
| 348 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 347 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
| 349 | }; | 348 | }; |
| 350 | 349 | ||