diff options
| author | 2019-11-12 23:39:45 -0300 | |
|---|---|---|
| committer | 2019-11-22 21:28:47 -0300 | |
| commit | dbeb52387979c7e28c0acb03dfc1468146947104 (patch) | |
| tree | 8c8e681dcc11a137517839dd64d839541cb6f9ce /src/video_core | |
| parent | gl_shader_cache: Specialize shader workgroup (diff) | |
| download | yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.gz yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.tar.xz yuzu-dbeb52387979c7e28c0acb03dfc1468146947104.zip | |
gl_shader_cache: Specialize shared memory size
Shared memory was being declared with an undefined size. Specialize from
guest GPU parameters the compute shader's shared memory size.
Diffstat (limited to 'src/video_core')
5 files changed, 25 insertions, 29 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bd4e5f6e3..ebfe52e6d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -731,7 +731,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 731 | 731 | ||
| 732 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 732 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 733 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | 733 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, |
| 734 | launch_desc.block_dim_z); | 734 | launch_desc.block_dim_z, launch_desc.shared_alloc); |
| 735 | std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); | 735 | std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); |
| 736 | state.draw.program_pipeline = 0; | 736 | state.draw.program_pipeline = 0; |
| 737 | 737 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a5789b6d3..982c4e23a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -8,7 +8,9 @@ | |||
| 8 | #include <thread> | 8 | #include <thread> |
| 9 | #include <unordered_set> | 9 | #include <unordered_set> |
| 10 | #include <boost/functional/hash.hpp> | 10 | #include <boost/functional/hash.hpp> |
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | ||
| 12 | #include "common/scope_exit.h" | 14 | #include "common/scope_exit.h" |
| 13 | #include "core/core.h" | 15 | #include "core/core.h" |
| 14 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| @@ -322,6 +324,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy | |||
| 322 | source += | 324 | source += |
| 323 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | 325 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", |
| 324 | variant.block_x, variant.block_y, variant.block_z); | 326 | variant.block_x, variant.block_y, variant.block_z); |
| 327 | |||
| 328 | if (variant.shared_memory_size > 0) { | ||
| 329 | source += fmt::format("shared uint smem[{}];", | ||
| 330 | Common::AlignUp(variant.shared_memory_size, 4) / 4); | ||
| 331 | } | ||
| 325 | } | 332 | } |
| 326 | 333 | ||
| 327 | source += '\n'; | 334 | source += '\n'; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 51c80bf32..fb2ba0905 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -223,7 +223,7 @@ private: | |||
| 223 | Type type{}; | 223 | Type type{}; |
| 224 | }; | 224 | }; |
| 225 | 225 | ||
| 226 | constexpr const char* GetTypeString(Type type) { | 226 | const char* GetTypeString(Type type) { |
| 227 | switch (type) { | 227 | switch (type) { |
| 228 | case Type::Bool: | 228 | case Type::Bool: |
| 229 | return "bool"; | 229 | return "bool"; |
| @@ -243,7 +243,7 @@ constexpr const char* GetTypeString(Type type) { | |||
| 243 | } | 243 | } |
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | 246 | const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { |
| 247 | switch (image_type) { | 247 | switch (image_type) { |
| 248 | case Tegra::Shader::ImageType::Texture1D: | 248 | case Tegra::Shader::ImageType::Texture1D: |
| 249 | return "1D"; | 249 | return "1D"; |
| @@ -522,13 +522,6 @@ private: | |||
| 522 | code.AddNewLine(); | 522 | code.AddNewLine(); |
| 523 | } | 523 | } |
| 524 | 524 | ||
| 525 | void DeclareSharedMemory() { | ||
| 526 | if (stage != ProgramType::Compute) { | ||
| 527 | return; | ||
| 528 | } | ||
| 529 | code.AddLine("shared uint {}[];", GetSharedMemory()); | ||
| 530 | } | ||
| 531 | |||
| 532 | void DeclareInternalFlags() { | 525 | void DeclareInternalFlags() { |
| 533 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | 526 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { |
| 534 | const auto flag_code = static_cast<InternalFlag>(flag); | 527 | const auto flag_code = static_cast<InternalFlag>(flag); |
| @@ -867,9 +860,7 @@ private: | |||
| 867 | } | 860 | } |
| 868 | 861 | ||
| 869 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | 862 | if (const auto smem = std::get_if<SmemNode>(&*node)) { |
| 870 | return { | 863 | return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; |
| 871 | fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), | ||
| 872 | Type::Uint}; | ||
| 873 | } | 864 | } |
| 874 | 865 | ||
| 875 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | 866 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |
| @@ -1245,9 +1236,7 @@ private: | |||
| 1245 | Type::Uint}; | 1236 | Type::Uint}; |
| 1246 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | 1237 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { |
| 1247 | ASSERT(stage == ProgramType::Compute); | 1238 | ASSERT(stage == ProgramType::Compute); |
| 1248 | target = { | 1239 | target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; |
| 1249 | fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), | ||
| 1250 | Type::Uint}; | ||
| 1251 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1240 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1252 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | 1241 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); |
| 1253 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | 1242 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); |
| @@ -2170,10 +2159,6 @@ private: | |||
| 2170 | return "lmem_" + suffix; | 2159 | return "lmem_" + suffix; |
| 2171 | } | 2160 | } |
| 2172 | 2161 | ||
| 2173 | std::string GetSharedMemory() const { | ||
| 2174 | return fmt::format("smem_{}", suffix); | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | std::string GetInternalFlag(InternalFlag flag) const { | 2162 | std::string GetInternalFlag(InternalFlag flag) const { |
| 2178 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", | 2163 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", |
| 2179 | "overflow_flag"}; | 2164 | "overflow_flag"}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9156f180a..d2bb8502a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -52,11 +52,11 @@ struct BindlessSamplerKey { | |||
| 52 | Tegra::Engines::SamplerDescriptor sampler{}; | 52 | Tegra::Engines::SamplerDescriptor sampler{}; |
| 53 | }; | 53 | }; |
| 54 | 54 | ||
| 55 | constexpr u32 NativeVersion = 7; | 55 | constexpr u32 NativeVersion = 8; |
| 56 | 56 | ||
| 57 | // Making sure sizes doesn't change by accident | 57 | // Making sure sizes doesn't change by accident |
| 58 | static_assert(sizeof(BaseBindings) == 16); | 58 | static_assert(sizeof(BaseBindings) == 16); |
| 59 | static_assert(sizeof(ProgramVariant) == 28); | 59 | static_assert(sizeof(ProgramVariant) == 32); |
| 60 | 60 | ||
| 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 62 | ShaderCacheVersionHash hash{}; | 62 | ShaderCacheVersionHash hash{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4c7ca004d..6f8e51364 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -64,9 +64,10 @@ struct ProgramVariant final { | |||
| 64 | : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} | 64 | : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} |
| 65 | 65 | ||
| 66 | /// Compute constructor. | 66 | /// Compute constructor. |
| 67 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept | 67 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, |
| 68 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} { | 68 | u32 shared_memory_size) noexcept |
| 69 | } | 69 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, |
| 70 | shared_memory_size{shared_memory_size} {} | ||
| 70 | 71 | ||
| 71 | // Graphics specific parameters. | 72 | // Graphics specific parameters. |
| 72 | BaseBindings base_bindings{}; | 73 | BaseBindings base_bindings{}; |
| @@ -76,11 +77,13 @@ struct ProgramVariant final { | |||
| 76 | u32 block_x{}; | 77 | u32 block_x{}; |
| 77 | u16 block_y{}; | 78 | u16 block_y{}; |
| 78 | u16 block_z{}; | 79 | u16 block_z{}; |
| 80 | u32 shared_memory_size{}; | ||
| 79 | 81 | ||
| 80 | bool operator==(const ProgramVariant& rhs) const noexcept { | 82 | bool operator==(const ProgramVariant& rhs) const noexcept { |
| 81 | return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) == | 83 | return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, |
| 82 | std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, | 84 | shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, |
| 83 | rhs.block_z); | 85 | rhs.block_x, rhs.block_y, rhs.block_z, |
| 86 | rhs.shared_memory_size); | ||
| 84 | } | 87 | } |
| 85 | 88 | ||
| 86 | bool operator!=(const ProgramVariant& rhs) const noexcept { | 89 | bool operator!=(const ProgramVariant& rhs) const noexcept { |
| @@ -129,7 +132,8 @@ struct hash<OpenGL::ProgramVariant> { | |||
| 129 | (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | 132 | (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ |
| 130 | static_cast<std::size_t>(variant.block_x) ^ | 133 | static_cast<std::size_t>(variant.block_x) ^ |
| 131 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | 134 | (static_cast<std::size_t>(variant.block_y) << 32) ^ |
| 132 | (static_cast<std::size_t>(variant.block_z) << 48); | 135 | (static_cast<std::size_t>(variant.block_z) << 48) ^ |
| 136 | (static_cast<std::size_t>(variant.shared_memory_size) << 16); | ||
| 133 | } | 137 | } |
| 134 | }; | 138 | }; |
| 135 | 139 | ||