summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-07-16 16:02:46 -0300
committerGravatar ReinUsesLisp2020-07-16 16:02:46 -0300
commita5a72cbd201489319856baaec88f02a6607423ae (patch)
treea260784d8810d6c3f982d77e7c7ebaa5dbf957c7
parentMerge pull request #4342 from lioncash/endian (diff)
downloadyuzu-a5a72cbd201489319856baaec88f02a6607423ae.tar.gz
yuzu-a5a72cbd201489319856baaec88f02a6607423ae.tar.xz
yuzu-a5a72cbd201489319856baaec88f02a6607423ae.zip
renderer_{opengl,vulkan}: Clamp shared memory to host's limit
This stops shaders from failing to build when the exceed host's shared memory size limit. An error is logged.
Diffstat (limited to '')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp12
6 files changed, 42 insertions, 9 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index eb5158407..31dd08fd2 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -892,11 +892,19 @@ void ARBDecompiler::DeclareCompute() {
892 const ComputeInfo& info = registry.GetComputeInfo(); 892 const ComputeInfo& info = registry.GetComputeInfo();
893 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], 893 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
894 info.workgroup_size[2]); 894 info.workgroup_size[2]);
895 if (info.shared_memory_size_in_words > 0) { 895 if (info.shared_memory_size_in_words == 0) {
896 const u32 size_in_bytes = info.shared_memory_size_in_words * 4; 896 return;
897 AddLine("SHARED_MEMORY {};", size_in_bytes); 897 }
898 AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); 898 const u32 limit = device.GetMaxComputeSharedMemorySize();
899 u32 size_in_bytes = info.shared_memory_size_in_words * 4;
900 if (size_in_bytes > limit) {
901 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
902 size_in_bytes, limit);
903 size_in_bytes = limit;
899 } 904 }
905
906 AddLine("SHARED_MEMORY {};", size_in_bytes);
907 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
900} 908}
901 909
902void ARBDecompiler::DeclareInputAttributes() { 910void ARBDecompiler::DeclareInputAttributes() {
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c1f20f0ab..b97c9da15 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -212,6 +212,7 @@ Device::Device()
212 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 212 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
213 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 213 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
214 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 214 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
215 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
215 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 216 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
216 GLAD_GL_NV_shader_thread_shuffle; 217 GLAD_GL_NV_shader_thread_shuffle;
217 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 218 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -248,6 +249,7 @@ Device::Device(std::nullptr_t) {
248 shader_storage_alignment = 4; 249 shader_storage_alignment = 4;
249 max_vertex_attributes = 16; 250 max_vertex_attributes = 16;
250 max_varyings = 15; 251 max_varyings = 15;
252 max_compute_shared_memory_size = 0x10000;
251 has_warp_intrinsics = true; 253 has_warp_intrinsics = true;
252 has_shader_ballot = true; 254 has_shader_ballot = true;
253 has_vertex_viewport_layer = true; 255 has_vertex_viewport_layer = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e1d811966..e71344978 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -52,6 +52,10 @@ public:
52 return max_varyings; 52 return max_varyings;
53 } 53 }
54 54
55 u32 GetMaxComputeSharedMemorySize() const {
56 return max_compute_shared_memory_size;
57 }
58
55 bool HasWarpIntrinsics() const { 59 bool HasWarpIntrinsics() const {
56 return has_warp_intrinsics; 60 return has_warp_intrinsics;
57 } 61 }
@@ -114,6 +118,7 @@ private:
114 std::size_t shader_storage_alignment{}; 118 std::size_t shader_storage_alignment{};
115 u32 max_vertex_attributes{}; 119 u32 max_vertex_attributes{};
116 u32 max_varyings{}; 120 u32 max_varyings{};
121 u32 max_compute_shared_memory_size{};
117 bool has_warp_intrinsics{}; 122 bool has_warp_intrinsics{};
118 bool has_shader_ballot{}; 123 bool has_shader_ballot{};
119 bool has_vertex_viewport_layer{}; 124 bool has_vertex_viewport_layer{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2c49aeaac..6a9602ff8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -602,8 +602,15 @@ private:
602 return; 602 return;
603 } 603 }
604 const auto& info = registry.GetComputeInfo(); 604 const auto& info = registry.GetComputeInfo();
605 if (const u32 size = info.shared_memory_size_in_words; size > 0) { 605 if (u32 size = info.shared_memory_size_in_words * 4; size > 0) {
606 code.AddLine("shared uint smem[{}];", size); 606 const u32 limit = device.GetMaxComputeSharedMemorySize();
607 if (size > limit) {
608 LOG_ERROR(Render_OpenGL, "Shared memory size {} is clamped to host's limit {}",
609 size, limit);
610 size = limit;
611 }
612
613 code.AddLine("shared uint smem[{}];", size / 4);
607 code.AddNewLine(); 614 code.AddNewLine();
608 } 615 }
609 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", 616 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index ae5c21baa..529744f2d 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns the maximum size for shared memory.
126 u32 GetMaxComputeSharedMemorySize() const {
127 return properties.limits.maxComputeSharedMemorySize;
128 }
129
125 /// Returns true if ASTC is natively supported. 130 /// Returns true if ASTC is natively supported.
126 bool IsOptimalAstcSupported() const { 131 bool IsOptimalAstcSupported() const {
127 return is_optimal_astc_supported; 132 return is_optimal_astc_supported;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97429cc59..694452fd8 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -685,13 +685,19 @@ private:
685 } 685 }
686 t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); 686 t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
687 687
688 const u32 smem_size = specialization.shared_memory_size; 688 u32 smem_size = specialization.shared_memory_size * 4;
689 if (smem_size == 0) { 689 if (smem_size == 0) {
690 // Avoid declaring an empty array. 690 // Avoid declaring an empty array.
691 return; 691 return;
692 } 692 }
693 const auto element_count = static_cast<u32>(Common::AlignUp(smem_size, 4) / 4); 693 const u32 limit = device.GetMaxComputeSharedMemorySize();
694 const Id type_array = TypeArray(t_uint, Constant(t_uint, element_count)); 694 if (smem_size > limit) {
695 LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
696 smem_size, limit);
697 smem_size = limit;
698 }
699
700 const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
695 const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); 701 const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
696 Name(type_pointer, "SharedMemory"); 702 Name(type_pointer, "SharedMemory");
697 703