summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-11-12 23:26:56 -0300
committerGravatar ReinUsesLisp2019-11-22 21:28:47 -0300
commit4f5d8e434278cd5999bf21e91f0923d55ec8d52b (patch)
tree542e261e7169cb9ba8ed129a53e0bbd181868752 /src
parentshader/texture: Handle TLDS texture type mismatches (diff)
downloadyuzu-4f5d8e434278cd5999bf21e91f0923d55ec8d52b.tar.gz
yuzu-4f5d8e434278cd5999bf21e91f0923d55ec8d52b.tar.xz
yuzu-4f5d8e434278cd5999bf21e91f0923d55ec8d52b.zip
gl_shader_cache: Specialize shader workgroup
Drop the usage of ARB_compute_variable_group_size and specialize compute shaders instead. This permits compute to run on AMD and Intel proprietary drivers.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h46
6 files changed, 74 insertions, 68 deletions
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 5259d92bd..bd49c6627 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -140,7 +140,7 @@ public:
140 140
141 INSERT_PADDING_WORDS(0x3); 141 INSERT_PADDING_WORDS(0x3);
142 142
143 BitField<0, 16, u32> shared_alloc; 143 BitField<0, 18, u32> shared_alloc;
144 144
145 BitField<16, 16, u32> block_dim_x; 145 BitField<16, 16, u32> block_dim_x;
146 union { 146 union {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index b76de71ec..bd4e5f6e3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -273,8 +273,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
273 SetupDrawGlobalMemory(stage, shader); 273 SetupDrawGlobalMemory(stage, shader);
274 SetupDrawTextures(stage, shader, base_bindings); 274 SetupDrawTextures(stage, shader, base_bindings);
275 275
276 const ProgramVariant variant{base_bindings, primitive_mode}; 276 const ProgramVariant variant(base_bindings, primitive_mode);
277 const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); 277 const auto [program_handle, next_bindings] = shader->GetHandle(variant);
278 278
279 switch (program) { 279 switch (program) {
280 case Maxwell::ShaderProgram::VertexA: 280 case Maxwell::ShaderProgram::VertexA:
@@ -725,18 +725,14 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
725} 725}
726 726
727void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 727void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
728 if (!GLAD_GL_ARB_compute_variable_group_size) {
729 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
730 "lack of GL_ARB_compute_variable_group_size");
731 return;
732 }
733
734 auto kernel = shader_cache.GetComputeKernel(code_addr); 728 auto kernel = shader_cache.GetComputeKernel(code_addr);
735 SetupComputeTextures(kernel); 729 SetupComputeTextures(kernel);
736 SetupComputeImages(kernel); 730 SetupComputeImages(kernel);
737 731
738 const auto [program, next_bindings] = kernel->GetProgramHandle({}); 732 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
739 state.draw.shader_program = program; 733 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
734 launch_desc.block_dim_z);
735 std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
740 state.draw.program_pipeline = 0; 736 state.draw.program_pipeline = 0;
741 737
742 const std::size_t buffer_size = 738 const std::size_t buffer_size =
@@ -760,10 +756,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
760 state.ApplyShaderProgram(); 756 state.ApplyShaderProgram();
761 state.ApplyProgramPipeline(); 757 state.ApplyProgramPipeline();
762 758
763 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 759 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
764 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
765 launch_desc.grid_dim_z, launch_desc.block_dim_x,
766 launch_desc.block_dim_y, launch_desc.block_dim_z);
767} 760}
768 761
769void RasterizerOpenGL::FlushAll() {} 762void RasterizerOpenGL::FlushAll() {}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7ce06a978..a5789b6d3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -255,7 +255,7 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
255 255
256CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, 256CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
257 const ProgramCode& program_code, const ProgramCode& program_code_b, 257 const ProgramCode& program_code, const ProgramCode& program_code_b,
258 const ProgramVariant& variant, ConstBufferLocker& locker, 258 ConstBufferLocker& locker, const ProgramVariant& variant,
259 bool hint_retrievable = false) { 259 bool hint_retrievable = false) {
260 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); 260 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
261 261
@@ -268,17 +268,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
268 } 268 }
269 const auto entries = GLShader::GetEntries(ir); 269 const auto entries = GLShader::GetEntries(ir);
270 270
271 auto base_bindings{variant.base_bindings};
272 const auto primitive_mode{variant.primitive_mode};
273
274 std::string source = fmt::format(R"(// {} 271 std::string source = fmt::format(R"(// {}
275#version 430 core 272#version 430 core
276#extension GL_ARB_separate_shader_objects : enable 273#extension GL_ARB_separate_shader_objects : enable
277)", 274)",
278 GetShaderId(unique_identifier, program_type)); 275 GetShaderId(unique_identifier, program_type));
279 if (is_compute) {
280 source += "#extension GL_ARB_compute_variable_group_size : require\n";
281 }
282 if (device.HasShaderBallot()) { 276 if (device.HasShaderBallot()) {
283 source += "#extension GL_ARB_shader_ballot : require\n"; 277 source += "#extension GL_ARB_shader_ballot : require\n";
284 } 278 }
@@ -295,6 +289,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
295 } 289 }
296 source += '\n'; 290 source += '\n';
297 291
292 auto base_bindings = variant.base_bindings;
298 if (!is_compute) { 293 if (!is_compute) {
299 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 294 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
300 } 295 }
@@ -318,13 +313,15 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
318 313
319 if (program_type == ProgramType::Geometry) { 314 if (program_type == ProgramType::Geometry) {
320 const auto [glsl_topology, debug_name, max_vertices] = 315 const auto [glsl_topology, debug_name, max_vertices] =
321 GetPrimitiveDescription(primitive_mode); 316 GetPrimitiveDescription(variant.primitive_mode);
322 317
323 source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; 318 source += fmt::format("layout ({}) in;\n\n", glsl_topology);
324 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 319 source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
325 } 320 }
326 if (program_type == ProgramType::Compute) { 321 if (program_type == ProgramType::Compute) {
327 source += "layout (local_size_variable) in;\n"; 322 source +=
323 fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
324 variant.block_x, variant.block_y, variant.block_z);
328 } 325 }
329 326
330 source += '\n'; 327 source += '\n';
@@ -422,58 +419,53 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params,
422 unspecialized.code_b)); 419 unspecialized.code_b));
423} 420}
424 421
425std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 422std::tuple<GLuint, BaseBindings> CachedShader::GetHandle(const ProgramVariant& variant) {
426 UpdateVariant(); 423 EnsureValidLockerVariant();
427 424
428 const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); 425 const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
429 auto& program = entry->second; 426 auto& program = entry->second;
430 if (is_cache_miss) { 427 if (is_cache_miss) {
431 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, 428 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
432 variant, *curr_variant->locker); 429 *curr_locker_variant->locker, variant);
433 disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); 430 disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
434 431
435 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); 432 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
436 } 433 }
437 434
438 auto base_bindings = variant.base_bindings; 435 auto base_bindings = variant.base_bindings;
439 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); 436 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
440 if (program_type != ProgramType::Compute) { 437 base_bindings.cbuf += STAGE_RESERVED_UBOS;
441 base_bindings.cbuf += STAGE_RESERVED_UBOS;
442 }
443 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 438 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
444 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 439 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
445 440
446 return {program->handle, base_bindings}; 441 return {program->handle, base_bindings};
447} 442}
448 443
449void CachedShader::UpdateVariant() { 444bool CachedShader::EnsureValidLockerVariant() {
450 if (curr_variant && !curr_variant->locker->IsConsistent()) { 445 const auto previous_variant = curr_locker_variant;
451 curr_variant = nullptr; 446 if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
447 curr_locker_variant = nullptr;
452 } 448 }
453 if (!curr_variant) { 449 if (!curr_locker_variant) {
454 for (auto& variant : locker_variants) { 450 for (auto& variant : locker_variants) {
455 if (variant->locker->IsConsistent()) { 451 if (variant->locker->IsConsistent()) {
456 curr_variant = variant.get(); 452 curr_locker_variant = variant.get();
457 } 453 }
458 } 454 }
459 } 455 }
460 if (!curr_variant) { 456 if (!curr_locker_variant) {
461 auto& new_variant = locker_variants.emplace_back(); 457 auto& new_variant = locker_variants.emplace_back();
462 new_variant = std::make_unique<LockerVariant>(); 458 new_variant = std::make_unique<LockerVariant>();
463 new_variant->locker = MakeLocker(system, program_type); 459 new_variant->locker = MakeLocker(system, program_type);
464 curr_variant = new_variant.get(); 460 curr_locker_variant = new_variant.get();
465 } 461 }
462 return previous_variant == curr_locker_variant;
466} 463}
467 464
468ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, 465ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
469 const ConstBufferLocker& locker) const { 466 const ConstBufferLocker& locker) const {
470 ShaderDiskCacheUsage usage; 467 return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
471 usage.unique_identifier = unique_identifier; 468 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
472 usage.variant = variant;
473 usage.keys = locker.GetKeys();
474 usage.bound_samplers = locker.GetBoundSamplers();
475 usage.bindless_samplers = locker.GetBindlessSamplers();
476 return usage;
477} 469}
478 470
479ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 471ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -534,9 +526,10 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
534 if (!shader) { 526 if (!shader) {
535 auto locker{MakeLocker(system, unspecialized.program_type)}; 527 auto locker{MakeLocker(system, unspecialized.program_type)};
536 FillLocker(*locker, usage); 528 FillLocker(*locker, usage);
529
537 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, 530 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
538 unspecialized.code, unspecialized.code_b, usage.variant, 531 unspecialized.code, unspecialized.code_b, *locker,
539 *locker, true); 532 usage.variant, true);
540 } 533 }
541 534
542 std::scoped_lock lock{mutex}; 535 std::scoped_lock lock{mutex};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6bd7c9cf1..795b05a19 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -86,7 +86,7 @@ public:
86 } 86 }
87 87
88 /// Gets the GL program handle for the shader 88 /// Gets the GL program handle for the shader
89 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 89 std::tuple<GLuint, BaseBindings> GetHandle(const ProgramVariant& variant);
90 90
91private: 91private:
92 struct LockerVariant { 92 struct LockerVariant {
@@ -98,7 +98,7 @@ private:
98 GLShader::ShaderEntries entries, ProgramCode program_code, 98 GLShader::ShaderEntries entries, ProgramCode program_code,
99 ProgramCode program_code_b); 99 ProgramCode program_code_b);
100 100
101 void UpdateVariant(); 101 bool EnsureValidLockerVariant();
102 102
103 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, 103 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
104 const VideoCommon::Shader::ConstBufferLocker& locker) const; 104 const VideoCommon::Shader::ConstBufferLocker& locker) const;
@@ -117,7 +117,7 @@ private:
117 ProgramCode program_code; 117 ProgramCode program_code;
118 ProgramCode program_code_b; 118 ProgramCode program_code_b;
119 119
120 LockerVariant* curr_variant = nullptr; 120 LockerVariant* curr_locker_variant = nullptr;
121 std::vector<std::unique_ptr<LockerVariant>> locker_variants; 121 std::vector<std::unique_ptr<LockerVariant>> locker_variants;
122}; 122};
123 123
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 3f4daf28d..9156f180a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -52,11 +52,11 @@ struct BindlessSamplerKey {
52 Tegra::Engines::SamplerDescriptor sampler{}; 52 Tegra::Engines::SamplerDescriptor sampler{};
53}; 53};
54 54
55constexpr u32 NativeVersion = 6; 55constexpr u32 NativeVersion = 7;
56 56
57// Making sure sizes doesn't change by accident 57// Making sure sizes doesn't change by accident
58static_assert(sizeof(BaseBindings) == 16); 58static_assert(sizeof(BaseBindings) == 16);
59static_assert(sizeof(ProgramVariant) == 20); 59static_assert(sizeof(ProgramVariant) == 28);
60 60
61ShaderCacheVersionHash GetShaderCacheVersionHash() { 61ShaderCacheVersionHash GetShaderCacheVersionHash() {
62 ShaderCacheVersionHash hash{}; 62 ShaderCacheVersionHash hash{};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 55311dc6d..4c7ca004d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -44,32 +44,49 @@ struct BaseBindings {
44 u32 sampler{}; 44 u32 sampler{};
45 u32 image{}; 45 u32 image{};
46 46
47 bool operator==(const BaseBindings& rhs) const { 47 bool operator==(const BaseBindings& rhs) const noexcept {
48 return std::tie(cbuf, gmem, sampler, image) == 48 return std::tie(cbuf, gmem, sampler, image) ==
49 std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); 49 std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
50 } 50 }
51 51
52 bool operator!=(const BaseBindings& rhs) const { 52 bool operator!=(const BaseBindings& rhs) const noexcept {
53 return !operator==(rhs); 53 return !operator==(rhs);
54 } 54 }
55}; 55};
56static_assert(std::is_trivially_copyable_v<BaseBindings>); 56static_assert(std::is_trivially_copyable_v<BaseBindings>);
57 57
58/// Describes the different variants a single program can be compiled. 58/// Describes the different variants a program can be compiled with.
59struct ProgramVariant { 59struct ProgramVariant final {
60 BaseBindings base_bindings; 60 ProgramVariant() = default;
61
62 /// Graphics constructor.
63 explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept
64 : base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
65
66 /// Compute constructor.
67 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z) noexcept
68 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)} {
69 }
70
71 // Graphics specific parameters.
72 BaseBindings base_bindings{};
61 GLenum primitive_mode{}; 73 GLenum primitive_mode{};
62 74
63 bool operator==(const ProgramVariant& rhs) const { 75 // Compute specific parameters.
64 return std::tie(base_bindings, primitive_mode) == 76 u32 block_x{};
65 std::tie(rhs.base_bindings, rhs.primitive_mode); 77 u16 block_y{};
78 u16 block_z{};
79
80 bool operator==(const ProgramVariant& rhs) const noexcept {
81 return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z) ==
82 std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
83 rhs.block_z);
66 } 84 }
67 85
68 bool operator!=(const ProgramVariant& rhs) const { 86 bool operator!=(const ProgramVariant& rhs) const noexcept {
69 return !operator==(rhs); 87 return !operator==(rhs);
70 } 88 }
71}; 89};
72
73static_assert(std::is_trivially_copyable_v<ProgramVariant>); 90static_assert(std::is_trivially_copyable_v<ProgramVariant>);
74 91
75/// Describes how a shader is used. 92/// Describes how a shader is used.
@@ -108,8 +125,11 @@ struct hash<OpenGL::BaseBindings> {
108template <> 125template <>
109struct hash<OpenGL::ProgramVariant> { 126struct hash<OpenGL::ProgramVariant> {
110 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { 127 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
111 return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ 128 return std::hash<OpenGL::BaseBindings>{}(variant.base_bindings) ^
112 (static_cast<std::size_t>(variant.primitive_mode) << 6); 129 (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
130 static_cast<std::size_t>(variant.block_x) ^
131 (static_cast<std::size_t>(variant.block_y) << 32) ^
132 (static_cast<std::size_t>(variant.block_z) << 48);
113 } 133 }
114}; 134};
115 135
@@ -117,7 +137,7 @@ template <>
117struct hash<OpenGL::ShaderDiskCacheUsage> { 137struct hash<OpenGL::ShaderDiskCacheUsage> {
118 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { 138 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
119 return static_cast<std::size_t>(usage.unique_identifier) ^ 139 return static_cast<std::size_t>(usage.unique_identifier) ^
120 std::hash<OpenGL::ProgramVariant>()(usage.variant); 140 std::hash<OpenGL::ProgramVariant>{}(usage.variant);
121 } 141 }
122}; 142};
123 143