summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-11-13 00:25:52 -0300
committerGravatar ReinUsesLisp2019-11-22 21:28:48 -0300
commit287ae2b9e8ea38642a4c8e36f7863d881d4c0e87 (patch)
treefeb26b3520031dfff59e7cf8e85018ab888cc2fa /src
parentgl_shader_cache: Specialize shared memory size (diff)
downloadyuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.gz
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.tar.xz
yuzu-287ae2b9e8ea38642a4c8e36f7863d881d4c0e87.zip
gl_shader_cache: Specialize local memory size for compute shaders
Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/kepler_compute.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h16
6 files changed, 32 insertions, 21 deletions
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index bd49c6627..c526287b7 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -178,7 +178,12 @@ public:
178 BitField<24, 5, u32> gpr_alloc; 178 BitField<24, 5, u32> gpr_alloc;
179 }; 179 };
180 180
181 INSERT_PADDING_WORDS(0x11); 181 union {
182 BitField<0, 20, u32> local_crs_alloc;
183 BitField<24, 5, u32> sass_version;
184 };
185
186 INSERT_PADDING_WORDS(0x10);
182 } launch_description{}; 187 } launch_description{};
183 188
184 struct { 189 struct {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ebfe52e6d..d890076f8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
731 731
732 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 732 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
733 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, 733 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
734 launch_desc.block_dim_z, launch_desc.shared_alloc); 734 launch_desc.block_dim_z, launch_desc.shared_alloc,
735 launch_desc.local_pos_alloc);
735 std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); 736 std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
736 state.draw.program_pipeline = 0; 737 state.draw.program_pipeline = 0;
737 738
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 982c4e23a..b23a982d7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
329 source += fmt::format("shared uint smem[{}];", 329 source += fmt::format("shared uint smem[{}];",
330 Common::AlignUp(variant.shared_memory_size, 4) / 4); 330 Common::AlignUp(variant.shared_memory_size, 4) / 4);
331 } 331 }
332
333 if (variant.local_memory_size > 0) {
334 source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
335 Common::AlignUp(variant.local_memory_size, 4) / 4);
336 }
332 } 337 }
333 338
334 source += '\n'; 339 source += '\n';
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index fb2ba0905..fe016c05c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -510,10 +510,14 @@ private:
510 } 510 }
511 511
512 void DeclareLocalMemory() { 512 void DeclareLocalMemory() {
513 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at 513 if (stage == ProgramType::Compute) {
514 // specialization time. 514 code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
515 const u64 local_memory_size = 515 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
516 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); 516 code.AddLine("#endif");
517 return;
518 }
519
520 const u64 local_memory_size = header.GetLocalMemorySize();
517 if (local_memory_size == 0) { 521 if (local_memory_size == 0) {
518 return; 522 return;
519 } 523 }
@@ -851,9 +855,6 @@ private:
851 } 855 }
852 856
853 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 857 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
854 if (stage == ProgramType::Compute) {
855 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
856 }
857 return { 858 return {
858 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), 859 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
859 Type::Uint}; 860 Type::Uint};
@@ -1228,9 +1229,6 @@ private:
1228 } 1229 }
1229 target = std::move(*output); 1230 target = std::move(*output);
1230 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1231 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1231 if (stage == ProgramType::Compute) {
1232 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1233 }
1234 target = { 1232 target = {
1235 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), 1233 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1236 Type::Uint}; 1234 Type::Uint};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d2bb8502a..5ebcbbbba 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -52,11 +52,11 @@ struct BindlessSamplerKey {
52 Tegra::Engines::SamplerDescriptor sampler{}; 52 Tegra::Engines::SamplerDescriptor sampler{};
53}; 53};
54 54
55constexpr u32 NativeVersion = 8; 55constexpr u32 NativeVersion = 9;
56 56
57// Making sure sizes doesn't change by accident 57// Making sure sizes doesn't change by accident
58static_assert(sizeof(BaseBindings) == 16); 58static_assert(sizeof(BaseBindings) == 16);
59static_assert(sizeof(ProgramVariant) == 32); 59static_assert(sizeof(ProgramVariant) == 36);
60 60
61ShaderCacheVersionHash GetShaderCacheVersionHash() { 61ShaderCacheVersionHash GetShaderCacheVersionHash() {
62 ShaderCacheVersionHash hash{}; 62 ShaderCacheVersionHash hash{};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 6f8e51364..28689f6c7 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -64,10 +64,10 @@ struct ProgramVariant final {
64 : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} 64 : base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
65 65
66 /// Compute constructor. 66 /// Compute constructor.
67 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, 67 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
68 u32 shared_memory_size) noexcept 68 u32 local_memory_size) noexcept
69 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, 69 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
70 shared_memory_size{shared_memory_size} {} 70 shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
71 71
72 // Graphics specific parameters. 72 // Graphics specific parameters.
73 BaseBindings base_bindings{}; 73 BaseBindings base_bindings{};
@@ -78,12 +78,13 @@ struct ProgramVariant final {
78 u16 block_y{}; 78 u16 block_y{};
79 u16 block_z{}; 79 u16 block_z{};
80 u32 shared_memory_size{}; 80 u32 shared_memory_size{};
81 u32 local_memory_size{};
81 82
82 bool operator==(const ProgramVariant& rhs) const noexcept { 83 bool operator==(const ProgramVariant& rhs) const noexcept {
83 return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, 84 return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
84 shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode, 85 shared_memory_size, local_memory_size) ==
85 rhs.block_x, rhs.block_y, rhs.block_z, 86 std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
86 rhs.shared_memory_size); 87 rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
87 } 88 }
88 89
89 bool operator!=(const ProgramVariant& rhs) const noexcept { 90 bool operator!=(const ProgramVariant& rhs) const noexcept {
@@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
133 static_cast<std::size_t>(variant.block_x) ^ 134 static_cast<std::size_t>(variant.block_x) ^
134 (static_cast<std::size_t>(variant.block_y) << 32) ^ 135 (static_cast<std::size_t>(variant.block_y) << 32) ^
135 (static_cast<std::size_t>(variant.block_z) << 48) ^ 136 (static_cast<std::size_t>(variant.block_z) << 48) ^
136 (static_cast<std::size_t>(variant.shared_memory_size) << 16); 137 (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
138 (static_cast<std::size_t>(variant.local_memory_size) << 36);
137 } 139 }
138}; 140};
139 141