diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 12 |
6 files changed, 42 insertions, 9 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index c06e838f7..3b61c9e21 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -913,11 +913,19 @@ void ARBDecompiler::DeclareCompute() { | |||
| 913 | const ComputeInfo& info = registry.GetComputeInfo(); | 913 | const ComputeInfo& info = registry.GetComputeInfo(); |
| 914 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | 914 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], |
| 915 | info.workgroup_size[2]); | 915 | info.workgroup_size[2]); |
| 916 | if (info.shared_memory_size_in_words > 0) { | 916 | if (info.shared_memory_size_in_words == 0) { |
| 917 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | 917 | return; |
| 918 | AddLine("SHARED_MEMORY {};", size_in_bytes); | 918 | } |
| 919 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | 919 | const u32 limit = device.GetMaxComputeSharedMemorySize(); |
| 920 | u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 921 | if (size_in_bytes > limit) { | ||
| 922 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 923 | size_in_bytes, limit); | ||
| 924 | size_in_bytes = limit; | ||
| 920 | } | 925 | } |
| 926 | |||
| 927 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 928 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 921 | } | 929 | } |
| 922 | 930 | ||
| 923 | void ARBDecompiler::DeclareInputAttributes() { | 931 | void ARBDecompiler::DeclareInputAttributes() { |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 630acb73b..e7d95149f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -212,6 +212,7 @@ Device::Device() | |||
| 212 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 212 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 213 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 213 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 214 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 214 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 215 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | ||
| 215 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 216 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 216 | GLAD_GL_NV_shader_thread_shuffle; | 217 | GLAD_GL_NV_shader_thread_shuffle; |
| 217 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | 218 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
| @@ -250,6 +251,7 @@ Device::Device(std::nullptr_t) { | |||
| 250 | shader_storage_alignment = 4; | 251 | shader_storage_alignment = 4; |
| 251 | max_vertex_attributes = 16; | 252 | max_vertex_attributes = 16; |
| 252 | max_varyings = 15; | 253 | max_varyings = 15; |
| 254 | max_compute_shared_memory_size = 0x10000; | ||
| 253 | has_warp_intrinsics = true; | 255 | has_warp_intrinsics = true; |
| 254 | has_shader_ballot = true; | 256 | has_shader_ballot = true; |
| 255 | has_vertex_viewport_layer = true; | 257 | has_vertex_viewport_layer = true; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 94d38d7d1..8a4b6b9fc 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -52,6 +52,10 @@ public: | |||
| 52 | return max_varyings; | 52 | return max_varyings; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | u32 GetMaxComputeSharedMemorySize() const { | ||
| 56 | return max_compute_shared_memory_size; | ||
| 57 | } | ||
| 58 | |||
| 55 | bool HasWarpIntrinsics() const { | 59 | bool HasWarpIntrinsics() const { |
| 56 | return has_warp_intrinsics; | 60 | return has_warp_intrinsics; |
| 57 | } | 61 | } |
| @@ -118,6 +122,7 @@ private: | |||
| 118 | std::size_t shader_storage_alignment{}; | 122 | std::size_t shader_storage_alignment{}; |
| 119 | u32 max_vertex_attributes{}; | 123 | u32 max_vertex_attributes{}; |
| 120 | u32 max_varyings{}; | 124 | u32 max_varyings{}; |
| 125 | u32 max_compute_shared_memory_size{}; | ||
| 121 | bool has_warp_intrinsics{}; | 126 | bool has_warp_intrinsics{}; |
| 122 | bool has_shader_ballot{}; | 127 | bool has_shader_ballot{}; |
| 123 | bool has_vertex_viewport_layer{}; | 128 | bool has_vertex_viewport_layer{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2c49aeaac..6a9602ff8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -602,8 +602,15 @@ private: | |||
| 602 | return; | 602 | return; |
| 603 | } | 603 | } |
| 604 | const auto& info = registry.GetComputeInfo(); | 604 | const auto& info = registry.GetComputeInfo(); |
| 605 | if (const u32 size = info.shared_memory_size_in_words; size > 0) { | 605 | if (u32 size = info.shared_memory_size_in_words * 4; size > 0) { |
| 606 | code.AddLine("shared uint smem[{}];", size); | 606 | const u32 limit = device.GetMaxComputeSharedMemorySize(); |
| 607 | if (size > limit) { | ||
| 608 | LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}", | ||
| 609 | size, limit); | ||
| 610 | size = limit; | ||
| 611 | } | ||
| 612 | |||
| 613 | code.AddLine("shared uint smem[{}];", size / 4); | ||
| 607 | code.AddNewLine(); | 614 | code.AddNewLine(); |
| 608 | } | 615 | } |
| 609 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | 616 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index ae5c21baa..529744f2d 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,6 +122,11 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns the maximum size for shared memory. | ||
| 126 | u32 GetMaxComputeSharedMemorySize() const { | ||
| 127 | return properties.limits.maxComputeSharedMemorySize; | ||
| 128 | } | ||
| 129 | |||
| 125 | /// Returns true if ASTC is natively supported. | 130 | /// Returns true if ASTC is natively supported. |
| 126 | bool IsOptimalAstcSupported() const { | 131 | bool IsOptimalAstcSupported() const { |
| 127 | return is_optimal_astc_supported; | 132 | return is_optimal_astc_supported; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 97429cc59..694452fd8 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -685,13 +685,19 @@ private: | |||
| 685 | } | 685 | } |
| 686 | t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); | 686 | t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); |
| 687 | 687 | ||
| 688 | const u32 smem_size = specialization.shared_memory_size; | 688 | u32 smem_size = specialization.shared_memory_size * 4; |
| 689 | if (smem_size == 0) { | 689 | if (smem_size == 0) { |
| 690 | // Avoid declaring an empty array. | 690 | // Avoid declaring an empty array. |
| 691 | return; | 691 | return; |
| 692 | } | 692 | } |
| 693 | const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); | 693 | const u32 limit = device.GetMaxComputeSharedMemorySize(); |
| 694 | const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); | 694 | if (smem_size > limit) { |
| 695 | LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", | ||
| 696 | smem_size, limit); | ||
| 697 | smem_size = limit; | ||
| 698 | } | ||
| 699 | |||
| 700 | const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); | ||
| 695 | const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); | 701 | const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); |
| 696 | Name(type_pointer, "SharedMemory"); | 702 | Name(type_pointer, "SharedMemory"); |
| 697 | 703 | ||