summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h4
-rw-r--r--src/video_core/shader/registry.cpp59
-rw-r--r--src/video_core/shader/registry.h49
-rw-r--r--src/video_core/shader/track.cpp9
8 files changed, 176 insertions, 75 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 87d25b5a5..72a5dc82a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
166 166
167std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { 167std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
168 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; 168 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
169 auto registry = std::make_shared<Registry>(entry.type, guest_profile); 169 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
170 registry->SetBoundBuffer(entry.bound_buffer); 170 entry.graphics_info, entry.compute_info};
171 const auto registry = std::make_shared<Registry>(entry.type, info);
171 for (const auto& [address, value] : entry.keys) { 172 for (const auto& [address, value] : entry.keys) {
172 const auto [buffer, offset] = address; 173 const auto [buffer, offset] = address;
173 registry->InsertKey(buffer, offset, value); 174 registry->InsertKey(buffer, offset, value);
@@ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
184 185
185std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, 186std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
186 u64 unique_identifier, const ShaderIR& ir, 187 u64 unique_identifier, const ShaderIR& ir,
187 bool hint_retrievable = false) { 188 const Registry& registry, bool hint_retrievable = false) {
188 LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); 189 LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type));
189 const std::string glsl = DecompileShader(device, ir, shader_type); 190 const std::string glsl = DecompileShader(device, ir, registry, shader_type);
190 OGLShader shader; 191 OGLShader shader;
191 shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); 192 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
192 193
@@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
239 // if (!code_b.empty()) { 240 // if (!code_b.empty()) {
240 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); 241 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
241 // } 242 // }
242 auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); 243 auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
243 244
244 ShaderDiskCacheEntry entry; 245 ShaderDiskCacheEntry entry;
245 entry.type = shader_type; 246 entry.type = shader_type;
@@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
247 entry.code_b = std::move(code_b); 248 entry.code_b = std::move(code_b);
248 entry.unique_identifier = params.unique_identifier; 249 entry.unique_identifier = params.unique_identifier;
249 entry.bound_buffer = registry->GetBoundBuffer(); 250 entry.bound_buffer = registry->GetBoundBuffer();
251 entry.graphics_info = registry->GetGraphicsInfo();
250 entry.keys = registry->GetKeys(); 252 entry.keys = registry->GetKeys();
251 entry.bound_samplers = registry->GetBoundSamplers(); 253 entry.bound_samplers = registry->GetBoundSamplers();
252 entry.bindless_samplers = registry->GetBindlessSamplers(); 254 entry.bindless_samplers = registry->GetBindlessSamplers();
@@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
260Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
261 const std::size_t size_in_bytes = code.size() * sizeof(u64); 263 const std::size_t size_in_bytes = code.size() * sizeof(u64);
262 264
263 auto registry = 265 auto& engine = params.system.GPU().KeplerCompute();
264 std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute()); 266 auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
265 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); 267 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
266 auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); 268 const u64 uid = params.unique_identifier;
269 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
267 270
268 ShaderDiskCacheEntry entry; 271 ShaderDiskCacheEntry entry;
269 entry.type = ShaderType::Compute; 272 entry.type = ShaderType::Compute;
270 entry.code = std::move(code); 273 entry.code = std::move(code);
271 entry.unique_identifier = params.unique_identifier; 274 entry.unique_identifier = uid;
272 entry.bound_buffer = registry->GetBoundBuffer(); 275 entry.bound_buffer = registry->GetBoundBuffer();
276 entry.compute_info = registry->GetComputeInfo();
273 entry.keys = registry->GetKeys(); 277 entry.keys = registry->GetKeys();
274 entry.bound_samplers = registry->GetBoundSamplers(); 278 entry.bound_samplers = registry->GetBoundSamplers();
275 entry.bindless_samplers = registry->GetBindlessSamplers(); 279 entry.bindless_samplers = registry->GetBindlessSamplers();
@@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
331 return; 335 return;
332 } 336 }
333 const auto& entry = (*transferable)[i]; 337 const auto& entry = (*transferable)[i];
334 const u64 unique_identifier = entry.unique_identifier; 338 const u64 uid = entry.unique_identifier;
335 const auto it = find_precompiled(unique_identifier); 339 const auto it = find_precompiled(uid);
336 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; 340 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
337 341
338 const bool is_compute = entry.type == ShaderType::Compute; 342 const bool is_compute = entry.type == ShaderType::Compute;
@@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
350 } 354 }
351 if (!program) { 355 if (!program) {
352 // Otherwise compile it from GLSL 356 // Otherwise compile it from GLSL
353 program = BuildShader(device, entry.type, unique_identifier, ir, true); 357 program = BuildShader(device, entry.type, uid, ir, *registry, true);
354 } 358 }
355 359
356 PrecompiledShader shader; 360 PrecompiledShader shader;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 308e57aae..48a25f1f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode;
36using Tegra::Shader::IpaMode; 36using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::Register; 38using Tegra::Shader::Register;
39using VideoCommon::Shader::Registry;
39 40
40using namespace std::string_literals; 41using namespace std::string_literals;
41using namespace VideoCommon::Shader; 42using namespace VideoCommon::Shader;
@@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
288 } 289 }
289} 290}
290 291
292/// Describes primitive behavior on geometry shaders
293std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
294 switch (topology) {
295 case Maxwell::PrimitiveTopology::Points:
296 return {"points", 1};
297 case Maxwell::PrimitiveTopology::Lines:
298 case Maxwell::PrimitiveTopology::LineStrip:
299 return {"lines", 2};
300 case Maxwell::PrimitiveTopology::LinesAdjacency:
301 case Maxwell::PrimitiveTopology::LineStripAdjacency:
302 return {"lines_adjacency", 4};
303 case Maxwell::PrimitiveTopology::Triangles:
304 case Maxwell::PrimitiveTopology::TriangleStrip:
305 case Maxwell::PrimitiveTopology::TriangleFan:
306 return {"triangles", 3};
307 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
308 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
309 return {"triangles_adjacency", 6};
310 default:
311 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
312 return {"points", 1};
313 }
314}
315
291/// Generates code to use for a swizzle operation. 316/// Generates code to use for a swizzle operation.
292constexpr const char* GetSwizzle(std::size_t element) { 317constexpr const char* GetSwizzle(std::size_t element) {
293 constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; 318 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
@@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) {
367 392
368class GLSLDecompiler final { 393class GLSLDecompiler final {
369public: 394public:
370 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, 395 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
371 std::string_view suffix) 396 ShaderType stage, std::string_view suffix)
372 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 397 : device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix},
398 header{ir.GetHeader()} {}
373 399
374 void Decompile() { 400 void Decompile() {
375 DeclareHeader(); 401 DeclareHeader();
376 DeclareVertex(); 402 DeclareVertex();
377 DeclareGeometry(); 403 DeclareGeometry();
378 DeclareFragment(); 404 DeclareFragment();
405 DeclareCompute();
379 DeclareRegisters(); 406 DeclareRegisters();
380 DeclareCustomVariables(); 407 DeclareCustomVariables();
381 DeclarePredicates(); 408 DeclarePredicates();
@@ -489,9 +516,15 @@ private:
489 return; 516 return;
490 } 517 }
491 518
519 const auto& info = registry.GetGraphicsInfo();
520 const auto input_topology = info.primitive_topology;
521 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
522 max_input_vertices = max_vertices;
523 code.AddLine("layout ({}) in;", glsl_topology);
524
492 const auto topology = GetTopologyName(header.common3.output_topology); 525 const auto topology = GetTopologyName(header.common3.output_topology);
493 const auto max_vertices = header.common4.max_output_vertices.Value(); 526 const auto max_output_vertices = header.common4.max_output_vertices.Value();
494 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); 527 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
495 code.AddNewLine(); 528 code.AddNewLine();
496 529
497 code.AddLine("in gl_PerVertex {{"); 530 code.AddLine("in gl_PerVertex {{");
@@ -513,7 +546,8 @@ private:
513 if (!IsRenderTargetEnabled(render_target)) { 546 if (!IsRenderTargetEnabled(render_target)) {
514 continue; 547 continue;
515 } 548 }
516 code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); 549 code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target,
550 render_target);
517 any = true; 551 any = true;
518 } 552 }
519 if (any) { 553 if (any) {
@@ -521,6 +555,20 @@ private:
521 } 555 }
522 } 556 }
523 557
558 void DeclareCompute() {
559 if (stage != ShaderType::Compute) {
560 return;
561 }
562 const auto& info = registry.GetComputeInfo();
563 if (const u32 size = info.shared_memory_size_in_words; size > 0) {
564 code.AddLine("shared uint smem[];", size);
565 code.AddNewLine();
566 }
567 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
568 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
569 code.AddNewLine();
570 }
571
524 void DeclareVertexRedeclarations() { 572 void DeclareVertexRedeclarations() {
525 code.AddLine("out gl_PerVertex {{"); 573 code.AddLine("out gl_PerVertex {{");
526 ++code.scope; 574 ++code.scope;
@@ -596,18 +644,16 @@ private:
596 } 644 }
597 645
598 void DeclareLocalMemory() { 646 void DeclareLocalMemory() {
647 u64 local_memory_size = 0;
599 if (stage == ShaderType::Compute) { 648 if (stage == ShaderType::Compute) {
600 code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); 649 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
601 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); 650 } else {
602 code.AddLine("#endif"); 651 local_memory_size = header.GetLocalMemorySize();
603 return;
604 } 652 }
605
606 const u64 local_memory_size = header.GetLocalMemorySize();
607 if (local_memory_size == 0) { 653 if (local_memory_size == 0) {
608 return; 654 return;
609 } 655 }
610 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 656 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
611 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); 657 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
612 code.AddNewLine(); 658 code.AddNewLine();
613 } 659 }
@@ -996,7 +1042,8 @@ private:
996 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 1042 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
997 // set an 0x80000000 index for those and the shader fails to build. Find out why 1043 // set an 0x80000000 index for those and the shader fails to build. Find out why
998 // this happens and what's its intent. 1044 // this happens and what's its intent.
999 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); 1045 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1046 max_input_vertices.value());
1000 } 1047 }
1001 return std::string(name); 1048 return std::string(name);
1002 }; 1049 };
@@ -2428,11 +2475,14 @@ private:
2428 2475
2429 const Device& device; 2476 const Device& device;
2430 const ShaderIR& ir; 2477 const ShaderIR& ir;
2478 const Registry& registry;
2431 const ShaderType stage; 2479 const ShaderType stage;
2432 const std::string_view suffix; 2480 const std::string_view suffix;
2433 const Header header; 2481 const Header header;
2434 2482
2435 ShaderWriter code; 2483 ShaderWriter code;
2484
2485 std::optional<u32> max_input_vertices;
2436}; 2486};
2437 2487
2438std::string GetFlowVariable(u32 index) { 2488std::string GetFlowVariable(u32 index) {
@@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2647 return entries; 2697 return entries;
2648} 2698}
2649 2699
2650std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, 2700std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2651 std::string_view suffix) { 2701 ShaderType stage, std::string_view suffix) {
2652 GLSLDecompiler decompiler(device, ir, stage, suffix); 2702 GLSLDecompiler decompiler(device, ir, registry, stage, suffix);
2653 decompiler.Decompile(); 2703 decompiler.Decompile();
2654 return decompiler.GetResult(); 2704 return decompiler.GetResult();
2655} 2705}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index ae97ab504..68b68ee77 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,12 +12,9 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/engines/shader_type.h" 14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader {
18class ShaderIR;
19}
20
21namespace OpenGL { 18namespace OpenGL {
22 19
23class Device; 20class Device;
@@ -80,6 +77,7 @@ struct ShaderEntries {
80ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); 77ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
81 78
82std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 79std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
80 const VideoCommon::Shader::Registry& registry,
83 Tegra::Engines::ShaderType stage, std::string_view suffix = {}); 81 Tegra::Engines::ShaderType stage, std::string_view suffix = {});
84 82
85} // namespace OpenGL 83} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 0e1717c5e..5d5118058 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -48,7 +48,7 @@ struct BindlessSamplerKey {
48 Tegra::Engines::SamplerDescriptor sampler; 48 Tegra::Engines::SamplerDescriptor sampler;
49}; 49};
50 50
51constexpr u32 NativeVersion = 16; 51constexpr u32 NativeVersion = 17;
52 52
53ShaderCacheVersionHash GetShaderCacheVersionHash() { 53ShaderCacheVersionHash GetShaderCacheVersionHash() {
54 ShaderCacheVersionHash hash{}; 54 ShaderCacheVersionHash hash{};
@@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
83 return false; 83 return false;
84 } 84 }
85 85
86 bool is_texture_handler_size_known; 86 u8 is_texture_handler_size_known;
87 u32 texture_handler_size_value; 87 u32 texture_handler_size_value;
88 u32 num_keys; 88 u32 num_keys;
89 u32 num_bound_samplers; 89 u32 num_bound_samplers;
90 u32 num_bindless_samplers; 90 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || 91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 || 92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || 93 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&num_bound_samplers, 1) != 1 || 94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
95 file.ReadArray(&num_bindless_samplers, 1) != 1) { 96 file.ReadArray(&num_bindless_samplers, 1) != 1) {
96 return false; 97 return false;
97 } 98 }
@@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
136 } 137 }
137 138
138 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || 139 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
139 file.WriteObject(texture_handler_size.has_value()) != 1 || 140 file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
140 file.WriteObject(texture_handler_size.value_or(0)) != 1 || 141 file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
141 file.WriteObject(static_cast<u32>(keys.size())) != 1 || 143 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
142 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || 144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
143 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { 145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 7f2ab36be..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry {
51 ProgramCode code_b; 51 ProgramCode code_b;
52 52
53 u64 unique_identifier = 0; 53 u64 unique_identifier = 0;
54 u32 bound_buffer = 0;
55 std::optional<u32> texture_handler_size; 54 std::optional<u32> texture_handler_size;
55 u32 bound_buffer = 0;
56 VideoCommon::Shader::GraphicsInfo graphics_info;
57 VideoCommon::Shader::ComputeInfo compute_info;
56 VideoCommon::Shader::KeyMap keys; 58 VideoCommon::Shader::KeyMap keys;
57 VideoCommon::Shader::BoundSamplerMap bound_samplers; 59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
58 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index 7126caf98..dc2d3dce3 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -6,21 +6,55 @@
6#include <tuple> 6#include <tuple>
7 7
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/engines/kepler_compute.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h" 11#include "video_core/engines/shader_type.h"
11#include "video_core/shader/registry.h" 12#include "video_core/shader/registry.h"
12 13
13namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
14 15
16using Tegra::Engines::ConstBufferEngineInterface;
15using Tegra::Engines::SamplerDescriptor; 17using Tegra::Engines::SamplerDescriptor;
18using Tegra::Engines::ShaderType;
16 19
17Registry::Registry(Tegra::Engines::ShaderType shader_stage, 20namespace {
18 VideoCore::GuestDriverProfile stored_guest_driver_profile) 21
19 : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} 22GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
23 if (shader_stage == ShaderType::Compute) {
24 return {};
25 }
26 auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
27
28 GraphicsInfo info;
29 info.primitive_topology = graphics.regs.draw.topology;
30 return info;
31}
32
33ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
34 if (shader_stage != ShaderType::Compute) {
35 return {};
36 }
37 auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
38 const auto& launch = compute.launch_description;
39
40 ComputeInfo info;
41 info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
42 info.local_memory_size_in_words = launch.local_pos_alloc;
43 info.shared_memory_size_in_words = launch.shared_alloc;
44 return info;
45}
46
47} // Anonymous namespace
48
49Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
50 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
51 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
20 52
21Registry::Registry(Tegra::Engines::ShaderType shader_stage, 53Registry::Registry(Tegra::Engines::ShaderType shader_stage,
22 Tegra::Engines::ConstBufferEngineInterface& engine) 54 Tegra::Engines::ConstBufferEngineInterface& engine)
23 : stage{shader_stage}, engine{&engine} {} 55 : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
56 graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
57 shader_stage, engine)} {}
24 58
25Registry::~Registry() = default; 59Registry::~Registry() = default;
26 60
@@ -67,18 +101,6 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler
67 return value; 101 return value;
68} 102}
69 103
70std::optional<u32> Registry::ObtainBoundBuffer() {
71 if (bound_buffer_saved) {
72 return bound_buffer;
73 }
74 if (!engine) {
75 return std::nullopt;
76 }
77 bound_buffer_saved = true;
78 bound_buffer = engine->GetBoundBuffer();
79 return bound_buffer;
80}
81
82void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { 104void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
83 keys.insert_or_assign({buffer, offset}, value); 105 keys.insert_or_assign({buffer, offset}, value);
84} 106}
@@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s
91 bindless_samplers.insert_or_assign({buffer, offset}, sampler); 113 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
92} 114}
93 115
94void Registry::SetBoundBuffer(u32 buffer) {
95 bound_buffer_saved = true;
96 bound_buffer = buffer;
97}
98
99bool Registry::IsConsistent() const { 116bool Registry::IsConsistent() const {
100 if (!engine) { 117 if (!engine) {
101 return true; 118 return true;
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index a5487e1d7..c1a04ea02 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -4,11 +4,16 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <optional> 8#include <optional>
9#include <type_traits>
8#include <unordered_map> 10#include <unordered_map>
11#include <utility>
12
9#include "common/common_types.h" 13#include "common/common_types.h"
10#include "common/hash.h" 14#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h" 15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h" 18#include "video_core/guest_driver.h"
14 19
@@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescripto
19using BindlessSamplerMap = 24using BindlessSamplerMap =
20 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; 25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
21 26
27struct GraphicsInfo {
28 Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{};
29};
30static_assert(std::is_trivially_copyable_v<GraphicsInfo>);
31
32struct ComputeInfo {
33 std::array<u32, 3> workgroup_size{};
34 u32 shared_memory_size_in_words = 0;
35 u32 local_memory_size_in_words = 0;
36};
37static_assert(std::is_trivially_copyable_v<ComputeInfo>);
38
39struct SerializedRegistryInfo {
40 VideoCore::GuestDriverProfile guest_driver_profile;
41 u32 bound_buffer = 0;
42 GraphicsInfo graphics;
43 ComputeInfo compute;
44};
45
22/** 46/**
23 * The Registry is a class use to interface the 3D and compute engines with the shader compiler. 47 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
24 * With it, the shader can obtain required data from GPU state and store it for disk shader 48 * With it, the shader can obtain required data from GPU state and store it for disk shader
@@ -26,8 +50,7 @@ using BindlessSamplerMap =
26 */ 50 */
27class Registry { 51class Registry {
28public: 52public:
29 explicit Registry(Tegra::Engines::ShaderType shader_stage, 53 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
30 VideoCore::GuestDriverProfile stored_guest_driver_profile);
31 54
32 explicit Registry(Tegra::Engines::ShaderType shader_stage, 55 explicit Registry(Tegra::Engines::ShaderType shader_stage,
33 Tegra::Engines::ConstBufferEngineInterface& engine); 56 Tegra::Engines::ConstBufferEngineInterface& engine);
@@ -42,8 +65,6 @@ public:
42 65
43 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); 66 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
44 67
45 std::optional<u32> ObtainBoundBuffer();
46
47 /// Inserts a key. 68 /// Inserts a key.
48 void InsertKey(u32 buffer, u32 offset, u32 value); 69 void InsertKey(u32 buffer, u32 offset, u32 value);
49 70
@@ -53,9 +74,6 @@ public:
53 /// Inserts a bindless sampler key. 74 /// Inserts a bindless sampler key.
54 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); 75 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
55 76
56 /// Set the bound buffer for this registry.
57 void SetBoundBuffer(u32 buffer);
58
59 /// Checks keys and samplers against engine's current const buffers. 77 /// Checks keys and samplers against engine's current const buffers.
60 /// Returns true if they are the same value, false otherwise. 78 /// Returns true if they are the same value, false otherwise.
61 bool IsConsistent() const; 79 bool IsConsistent() const;
@@ -83,6 +101,18 @@ public:
83 return bound_buffer; 101 return bound_buffer;
84 } 102 }
85 103
104 /// Returns compute information from this shader
105 const GraphicsInfo& GetGraphicsInfo() const {
106 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
107 return graphics_info;
108 }
109
110 /// Returns compute information from this shader
111 const ComputeInfo& GetComputeInfo() const {
112 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
113 return compute_info;
114 }
115
86 /// Obtains access to the guest driver's profile. 116 /// Obtains access to the guest driver's profile.
87 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { 117 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
88 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; 118 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
@@ -95,8 +125,9 @@ private:
95 KeyMap keys; 125 KeyMap keys;
96 BoundSamplerMap bound_samplers; 126 BoundSamplerMap bound_samplers;
97 BindlessSamplerMap bindless_samplers; 127 BindlessSamplerMap bindless_samplers;
98 bool bound_buffer_saved{}; 128 u32 bound_buffer;
99 u32 bound_buffer{}; 129 GraphicsInfo graphics_info;
130 ComputeInfo compute_info;
100}; 131};
101 132
102} // namespace VideoCommon::Shader 133} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 831219841..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,14 +81,11 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); 81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track}; 82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { 83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 const auto bound_buffer = registry.ObtainBoundBuffer(); 84 const u32 bound_buffer = registry.GetBoundBuffer();
85 if (!bound_buffer) { 85 if (bound_buffer != cbuf->GetIndex()) {
86 return {}; 86 return {};
87 } 87 }
88 if (*bound_buffer != cbuf->GetIndex()) { 88 const auto pair = DecoupleIndirectRead(*operation);
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) { 89 if (!pair) {
93 return {}; 90 return {};
94 } 91 }