summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h1
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp13
-rw-r--r--src/shader_recompiler/ir_opt/passes.h2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h13
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
10 files changed, 9 insertions, 34 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index a42453e90..17a6d4888 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -292,7 +292,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
292 292
293 Optimization::PositionPass(env, program); 293 Optimization::PositionPass(env, program);
294 294
295 Optimization::GlobalMemoryToStorageBufferPass(program, host_info); 295 Optimization::GlobalMemoryToStorageBufferPass(program);
296 Optimization::TexturePass(env, program, host_info); 296 Optimization::TexturePass(env, program, host_info);
297 297
298 if (Settings::values.resolution_info.active) { 298 if (Settings::values.resolution_info.active) {
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 55fc48768..2aaa6c5ea 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -15,7 +15,6 @@ struct HostTranslateInfo {
15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered 15 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers 16 bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers
17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS 17 bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS
18 u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
19 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry 18 bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
20 ///< passthrough shaders 19 ///< passthrough shaders
21}; 20};
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 9101722ba..336338e62 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,7 +11,6 @@
11#include "shader_recompiler/frontend/ir/breadth_first_search.h" 11#include "shader_recompiler/frontend/ir/breadth_first_search.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/value.h" 13#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/host_translate_info.h"
15#include "shader_recompiler/ir_opt/passes.h" 14#include "shader_recompiler/ir_opt/passes.h"
16 15
17namespace Shader::Optimization { 16namespace Shader::Optimization {
@@ -403,7 +402,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
403} 402}
404 403
405/// Returns the offset in indices (not bytes) for an equivalent storage instruction 404/// Returns the offset in indices (not bytes) for an equivalent storage instruction
406IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { 405IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) {
407 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; 406 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
408 IR::U32 offset; 407 IR::U32 offset;
409 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { 408 if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
@@ -416,10 +415,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
416 } 415 }
417 // Subtract the least significant 32 bits from the guest offset. The result is the storage 416 // Subtract the least significant 32 bits from the guest offset. The result is the storage
418 // buffer offset in bytes. 417 // buffer offset in bytes.
419 IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; 418 const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
420
421 // Align the offset base to match the host alignment requirements
422 low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
423 return ir.ISub(offset, low_cbuf); 419 return ir.ISub(offset, low_cbuf);
424} 420}
425 421
@@ -514,7 +510,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
514} 510}
515} // Anonymous namespace 511} // Anonymous namespace
516 512
517void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { 513void GlobalMemoryToStorageBufferPass(IR::Program& program) {
518 StorageInfo info; 514 StorageInfo info;
519 for (IR::Block* const block : program.post_order_blocks) { 515 for (IR::Block* const block : program.post_order_blocks) {
520 for (IR::Inst& inst : block->Instructions()) { 516 for (IR::Inst& inst : block->Instructions()) {
@@ -538,8 +534,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
538 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; 534 const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
539 IR::Block* const block{storage_inst.block}; 535 IR::Block* const block{storage_inst.block};
540 IR::Inst* const inst{storage_inst.inst}; 536 IR::Inst* const inst{storage_inst.inst};
541 const IR::U32 offset{ 537 const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};
542 StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
543 Replace(*block, *inst, index, offset); 538 Replace(*block, *inst, index, offset);
544 } 539 }
545} 540}
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 4ffad1172..1f8f2ba95 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -15,7 +15,7 @@ namespace Shader::Optimization {
15void CollectShaderInfoPass(Environment& env, IR::Program& program); 15void CollectShaderInfoPass(Environment& env, IR::Program& program);
16void ConstantPropagationPass(Environment& env, IR::Program& program); 16void ConstantPropagationPass(Environment& env, IR::Program& program);
17void DeadCodeEliminationPass(IR::Program& program); 17void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); 18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program); 21void LowerInt64ToInt32(IR::Program& program);
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 627917ab6..06fd40851 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1938,21 +1938,14 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1938 bool is_written) const { 1938 bool is_written) const {
1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1939 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8); 1940 const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
1941 const u32 alignment = runtime.GetStorageBufferAlignment(); 1941 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1942
1943 const GPUVAddr aligned_gpu_addr = Common::AlignDown(gpu_addr, alignment);
1944 const u32 aligned_size =
1945 Common::AlignUp(static_cast<u32>(gpu_addr - aligned_gpu_addr) + size, alignment);
1946
1947 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(aligned_gpu_addr);
1948 if (!cpu_addr || size == 0) { 1942 if (!cpu_addr || size == 0) {
1949 return NULL_BINDING; 1943 return NULL_BINDING;
1950 } 1944 }
1951 1945 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE);
1952 const VAddr cpu_end = Common::AlignUp(*cpu_addr + aligned_size, Core::Memory::YUZU_PAGESIZE);
1953 const Binding binding{ 1946 const Binding binding{
1954 .cpu_addr = *cpu_addr, 1947 .cpu_addr = *cpu_addr,
1955 .size = is_written ? aligned_size : static_cast<u32>(cpu_end - *cpu_addr), 1948 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
1956 .buffer_id = BufferId{}, 1949 .buffer_id = BufferId{},
1957 }; 1950 };
1958 return binding; 1951 return binding;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index bb1962073..a8c3f8b67 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -160,10 +160,6 @@ public:
160 return device.CanReportMemoryUsage(); 160 return device.CanReportMemoryUsage();
161 } 161 }
162 162
163 u32 GetStorageBufferAlignment() const {
164 return static_cast<u32>(device.GetShaderStorageBufferAlignment());
165 }
166
167private: 163private:
168 static constexpr std::array PABO_LUT{ 164 static constexpr std::array PABO_LUT{
169 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 165 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 626ea7dcb..479bb8ba3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -236,7 +236,6 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
236 .needs_demote_reorder = device.IsAmd(), 236 .needs_demote_reorder = device.IsAmd(),
237 .support_snorm_render_buffer = false, 237 .support_snorm_render_buffer = false,
238 .support_viewport_index_layer = device.HasVertexViewportLayer(), 238 .support_viewport_index_layer = device.HasVertexViewportLayer(),
239 .min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
240 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(), 239 .support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
241 } { 240 } {
242 if (use_asynchronous_shaders) { 241 if (use_asynchronous_shaders) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1cfb4c2ff..b0153a502 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -330,10 +330,6 @@ bool BufferCacheRuntime::CanReportMemoryUsage() const {
330 return device.CanReportMemoryUsage(); 330 return device.CanReportMemoryUsage();
331} 331}
332 332
333u32 BufferCacheRuntime::GetStorageBufferAlignment() const {
334 return static_cast<u32>(device.GetStorageBufferAlignment());
335}
336
337void BufferCacheRuntime::Finish() { 333void BufferCacheRuntime::Finish() {
338 scheduler.Finish(); 334 scheduler.Finish();
339} 335}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 06539c733..183b33632 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -73,8 +73,6 @@ public:
73 73
74 bool CanReportMemoryUsage() const; 74 bool CanReportMemoryUsage() const;
75 75
76 u32 GetStorageBufferAlignment() const;
77
78 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
79 77
80 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7e69b11d8..0684cceed 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -344,7 +344,6 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
344 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE, 344 driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE,
345 .support_snorm_render_buffer = true, 345 .support_snorm_render_buffer = true,
346 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(), 346 .support_viewport_index_layer = device.IsExtShaderViewportIndexLayerSupported(),
347 .min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
348 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), 347 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
349 }; 348 };
350 349