summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt6
-rw-r--r--src/common/hash.h11
-rw-r--r--src/video_core/CMakeLists.txt7
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h119
-rw-r--r--src/video_core/engines/kepler_compute.cpp20
-rw-r--r--src/video_core/engines/kepler_compute.h14
-rw-r--r--src/video_core/engines/maxwell_3d.cpp21
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp533
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h77
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp449
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp86
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h36
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp7
-rw-r--r--src/video_core/shader/ast.cpp4
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp383
-rw-r--r--src/video_core/shader/control_flow.h69
-rw-r--r--src/video_core/shader/decode.cpp41
-rw-r--r--src/video_core/shader/decode/texture.cpp70
-rw-r--r--src/video_core/shader/expr.h21
-rw-r--r--src/video_core/shader/shader_ir.cpp7
-rw-r--r--src/video_core/shader/shader_ir.h24
28 files changed, 1482 insertions, 864 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 5b51fcafa..9c6f1c07c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp
74 "${VIDEO_CORE}/shader/decode/xmad.cpp" 74 "${VIDEO_CORE}/shader/decode/xmad.cpp"
75 "${VIDEO_CORE}/shader/ast.cpp" 75 "${VIDEO_CORE}/shader/ast.cpp"
76 "${VIDEO_CORE}/shader/ast.h" 76 "${VIDEO_CORE}/shader/ast.h"
77 "${VIDEO_CORE}/shader/control_flow.cpp"
78 "${VIDEO_CORE}/shader/control_flow.h"
79 "${VIDEO_CORE}/shader/compiler_settings.cpp" 77 "${VIDEO_CORE}/shader/compiler_settings.cpp"
80 "${VIDEO_CORE}/shader/compiler_settings.h" 78 "${VIDEO_CORE}/shader/compiler_settings.h"
79 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
80 "${VIDEO_CORE}/shader/const_buffer_locker.h"
81 "${VIDEO_CORE}/shader/control_flow.cpp"
82 "${VIDEO_CORE}/shader/control_flow.h"
81 "${VIDEO_CORE}/shader/decode.cpp" 83 "${VIDEO_CORE}/shader/decode.cpp"
82 "${VIDEO_CORE}/shader/expr.cpp" 84 "${VIDEO_CORE}/shader/expr.cpp"
83 "${VIDEO_CORE}/shader/expr.h" 85 "${VIDEO_CORE}/shader/expr.h"
diff --git a/src/common/hash.h b/src/common/hash.h
index 40194d1ee..ebd4125e2 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -6,6 +6,8 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9#include <utility>
10#include <boost/functional/hash.hpp>
9#include "common/cityhash.h" 11#include "common/cityhash.h"
10#include "common/common_types.h" 12#include "common/common_types.h"
11 13
@@ -68,4 +70,13 @@ struct HashableStruct {
68 } 70 }
69}; 71};
70 72
73struct PairHash {
74 template <class T1, class T2>
75 std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
76 std::size_t seed = std::hash<T1>()(pair.first);
77 boost::hash_combine(seed, std::hash<T2>()(pair.second));
78 return seed;
79 }
80};
81
71} // namespace Common 82} // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index eaa694ff8..cb6eda1b8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(video_core STATIC
6 dma_pusher.h 6 dma_pusher.h
7 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
8 debug_utils/debug_utils.h 8 debug_utils/debug_utils.h
9 engines/const_buffer_engine_interface.h
9 engines/const_buffer_info.h 10 engines/const_buffer_info.h
10 engines/engine_upload.cpp 11 engines/engine_upload.cpp
11 engines/engine_upload.h 12 engines/engine_upload.h
@@ -107,10 +108,12 @@ add_library(video_core STATIC
107 shader/decode/other.cpp 108 shader/decode/other.cpp
108 shader/ast.cpp 109 shader/ast.cpp
109 shader/ast.h 110 shader/ast.h
110 shader/control_flow.cpp
111 shader/control_flow.h
112 shader/compiler_settings.cpp 111 shader/compiler_settings.cpp
113 shader/compiler_settings.h 112 shader/compiler_settings.h
113 shader/const_buffer_locker.cpp
114 shader/const_buffer_locker.h
115 shader/control_flow.cpp
116 shader/control_flow.h
114 shader/decode.cpp 117 shader/decode.cpp
115 shader/expr.cpp 118 shader/expr.cpp
116 shader/expr.h 119 shader/expr.h
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
new file mode 100644
index 000000000..ac27b6cbe
--- /dev/null
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -0,0 +1,119 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/textures/texture.h"
12
13namespace Tegra::Engines {
14
15enum class ShaderType : u32 {
16 Vertex = 0,
17 TesselationControl = 1,
18 TesselationEval = 2,
19 Geometry = 3,
20 Fragment = 4,
21 Compute = 5,
22};
23
24struct SamplerDescriptor {
25 union {
26 BitField<0, 20, Tegra::Shader::TextureType> texture_type;
27 BitField<20, 1, u32> is_array;
28 BitField<21, 1, u32> is_buffer;
29 BitField<22, 1, u32> is_shadow;
30 u32 raw{};
31 };
32
33 bool operator==(const SamplerDescriptor& rhs) const noexcept {
34 return raw == rhs.raw;
35 }
36
37 bool operator!=(const SamplerDescriptor& rhs) const noexcept {
38 return !operator==(rhs);
39 }
40
41 static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
42 SamplerDescriptor result;
43 switch (tic_texture_type) {
44 case Tegra::Texture::TextureType::Texture1D:
45 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
46 result.is_array.Assign(0);
47 result.is_buffer.Assign(0);
48 result.is_shadow.Assign(0);
49 return result;
50 case Tegra::Texture::TextureType::Texture2D:
51 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
52 result.is_array.Assign(0);
53 result.is_buffer.Assign(0);
54 result.is_shadow.Assign(0);
55 return result;
56 case Tegra::Texture::TextureType::Texture3D:
57 result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
58 result.is_array.Assign(0);
59 result.is_buffer.Assign(0);
60 result.is_shadow.Assign(0);
61 return result;
62 case Tegra::Texture::TextureType::TextureCubemap:
63 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
64 result.is_array.Assign(0);
65 result.is_buffer.Assign(0);
66 result.is_shadow.Assign(0);
67 return result;
68 case Tegra::Texture::TextureType::Texture1DArray:
69 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
70 result.is_array.Assign(1);
71 result.is_buffer.Assign(0);
72 result.is_shadow.Assign(0);
73 return result;
74 case Tegra::Texture::TextureType::Texture2DArray:
75 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
76 result.is_array.Assign(1);
77 result.is_buffer.Assign(0);
78 result.is_shadow.Assign(0);
79 return result;
80 case Tegra::Texture::TextureType::Texture1DBuffer:
81 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
82 result.is_array.Assign(0);
83 result.is_buffer.Assign(1);
84 result.is_shadow.Assign(0);
85 return result;
86 case Tegra::Texture::TextureType::Texture2DNoMipmap:
87 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
88 result.is_array.Assign(0);
89 result.is_buffer.Assign(0);
90 result.is_shadow.Assign(0);
91 return result;
92 case Tegra::Texture::TextureType::TextureCubeArray:
93 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
94 result.is_array.Assign(1);
95 result.is_buffer.Assign(0);
96 result.is_shadow.Assign(0);
97 return result;
98 default:
99 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
100 result.is_array.Assign(0);
101 result.is_buffer.Assign(0);
102 result.is_shadow.Assign(0);
103 return result;
104 }
105 }
106};
107static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
108
109class ConstBufferEngineInterface {
110public:
111 virtual ~ConstBufferEngineInterface() = default;
112 virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
113 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
114 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
115 u64 offset) const = 0;
116 virtual u32 GetBoundBuffer() const = 0;
117};
118
119} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 63d449135..91adef360 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan
70 GetTSCEntry(tex_handle.tsc_id)}; 70 GetTSCEntry(tex_handle.tsc_id)};
71} 71}
72 72
73u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { 73u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
74 ASSERT(stage == ShaderType::Compute);
74 const auto& buffer = launch_description.const_buffer_config[const_buffer]; 75 const auto& buffer = launch_description.const_buffer_config[const_buffer];
75 u32 result; 76 u32 result;
76 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); 77 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
77 return result; 78 return result;
78} 79}
79 80
81SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
82 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
83}
84
85SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
86 u64 offset) const {
87 ASSERT(stage == ShaderType::Compute);
88 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
89 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
90
91 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
92 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset);
93 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
94 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
95 return result;
96}
97
80void KeplerCompute::ProcessLaunch() { 98void KeplerCompute::ProcessLaunch() {
81 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 99 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
82 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 100 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 90cf650d2..8e7182727 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h"
13#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
14#include "video_core/gpu.h" 15#include "video_core/gpu.h"
15#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
37#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 38#define KEPLER_COMPUTE_REG_INDEX(field_name) \
38 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 39 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
39 40
40class KeplerCompute final { 41class KeplerCompute final : public ConstBufferEngineInterface {
41public: 42public:
42 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 43 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
43 MemoryManager& memory_manager); 44 MemoryManager& memory_manager);
@@ -201,7 +202,16 @@ public:
201 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, 202 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
202 std::size_t offset) const; 203 std::size_t offset) const;
203 204
204 u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; 205 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
206
207 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
208
209 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
210 u64 offset) const override;
211
212 u32 GetBoundBuffer() const override {
213 return regs.tex_cb_index;
214 }
205 215
206private: 216private:
207 Core::System& system; 217 Core::System& system;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 59976943a..558955451 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -847,7 +847,8 @@ void Maxwell3D::ProcessClearBuffers() {
847 rasterizer.Clear(); 847 rasterizer.Clear();
848} 848}
849 849
850u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { 850u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
851 ASSERT(stage != ShaderType::Compute);
851 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; 852 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
852 const auto& buffer = shader_stage.const_buffers[const_buffer]; 853 const auto& buffer = shader_stage.const_buffers[const_buffer];
853 u32 result; 854 u32 result;
@@ -855,4 +856,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
855 return result; 856 return result;
856} 857}
857 858
859SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
860 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
861}
862
863SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
864 u64 offset) const {
865 ASSERT(stage != ShaderType::Compute);
866 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
867 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
868 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
869
870 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
871 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset);
872 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
873 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
874 return result;
875}
876
858} // namespace Tegra::Engines 877} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e3f1047d5..fa846a621 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -15,6 +15,7 @@
15#include "common/common_funcs.h" 15#include "common/common_funcs.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/math_util.h" 17#include "common/math_util.h"
18#include "video_core/engines/const_buffer_engine_interface.h"
18#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
19#include "video_core/engines/engine_upload.h" 20#include "video_core/engines/engine_upload.h"
20#include "video_core/gpu.h" 21#include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
44#define MAXWELL3D_REG_INDEX(field_name) \ 45#define MAXWELL3D_REG_INDEX(field_name) \
45 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 46 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
46 47
47class Maxwell3D final { 48class Maxwell3D final : public ConstBufferEngineInterface {
48public: 49public:
49 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 50 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
50 MemoryManager& memory_manager); 51 MemoryManager& memory_manager);
@@ -1257,7 +1258,16 @@ public:
1257 /// Returns the texture information for a specific texture in a specific shader stage. 1258 /// Returns the texture information for a specific texture in a specific shader stage.
1258 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; 1259 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
1259 1260
1260 u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; 1261 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1262
1263 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1264
1265 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1266 u64 offset) const override;
1267
1268 u32 GetBoundBuffer() const override {
1269 return regs.tex_cb_index;
1270 }
1261 1271
1262 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than 1272 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
1263 /// we've seen used. 1273 /// we've seen used.
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cbcf81414..9431d64ac 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -975,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
975 } 975 }
976 const auto cbuf = entry.GetBindlessCBuf(); 976 const auto cbuf = entry.GetBindlessCBuf();
977 Tegra::Texture::TextureHandle tex_handle; 977 Tegra::Texture::TextureHandle tex_handle;
978 tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); 978 Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
979 tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second);
979 return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); 980 return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
980 }(); 981 }();
981 982
@@ -1005,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
1005 } 1006 }
1006 const auto cbuf = entry.GetBindlessCBuf(); 1007 const auto cbuf = entry.GetBindlessCBuf();
1007 Tegra::Texture::TextureHandle tex_handle; 1008 Tegra::Texture::TextureHandle tex_handle;
1008 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); 1009 tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
1010 cbuf.first, cbuf.second);
1009 return compute.GetTextureInfo(tex_handle, entry.GetOffset()); 1011 return compute.GetTextureInfo(tex_handle, entry.GetOffset());
1010 }(); 1012 }();
1011 1013
@@ -1050,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
1050 } 1052 }
1051 const auto cbuf = entry.GetBindlessCBuf(); 1053 const auto cbuf = entry.GetBindlessCBuf();
1052 Tegra::Texture::TextureHandle tex_handle; 1054 Tegra::Texture::TextureHandle tex_handle;
1053 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); 1055 tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
1056 cbuf.first, cbuf.second);
1054 return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; 1057 return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
1055 }(); 1058 }();
1056 SetupImage(bindpoint, tic, entry); 1059 SetupImage(bindpoint, tic, entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ca3b1bd..f1b89165d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,13 +3,16 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <mutex>
6#include <optional>
7#include <string>
6#include <thread> 8#include <thread>
9#include <unordered_set>
7#include <boost/functional/hash.hpp> 10#include <boost/functional/hash.hpp>
8#include "common/assert.h" 11#include "common/assert.h"
9#include "common/hash.h"
10#include "common/scope_exit.h" 12#include "common/scope_exit.h"
11#include "core/core.h" 13#include "core/core.h"
12#include "core/frontend/emu_window.h" 14#include "core/frontend/emu_window.h"
15#include "video_core/engines/kepler_compute.h"
13#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
14#include "video_core/memory_manager.h" 17#include "video_core/memory_manager.h"
15#include "video_core/renderer_opengl/gl_rasterizer.h" 18#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -21,18 +24,20 @@
21 24
22namespace OpenGL { 25namespace OpenGL {
23 26
27using Tegra::Engines::ShaderType;
28using VideoCommon::Shader::ConstBufferLocker;
24using VideoCommon::Shader::ProgramCode; 29using VideoCommon::Shader::ProgramCode;
30using VideoCommon::Shader::ShaderIR;
31
32namespace {
25 33
26// One UBO is always reserved for emulation values on staged shaders 34// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 STAGE_RESERVED_UBOS = 1; 35constexpr u32 STAGE_RESERVED_UBOS = 1;
28 36
29struct UnspecializedShader { 37constexpr u32 STAGE_MAIN_OFFSET = 10;
30 std::string code; 38constexpr u32 KERNEL_MAIN_OFFSET = 0;
31 GLShader::ShaderEntries entries;
32 ProgramType program_type;
33};
34 39
35namespace { 40constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
36 41
37/// Gets the address for the specified shader stage program 42/// Gets the address for the specified shader stage program
38GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { 43GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
41 return gpu.regs.code_address.CodeAddress() + shader_config.offset; 46 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
42} 47}
43 48
49/// Gets if the current instruction offset is a scheduler instruction
50constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
51 // Sched instructions appear once every 4 instructions.
52 constexpr std::size_t SchedPeriod = 4;
53 const std::size_t absolute_offset = offset - main_offset;
54 return (absolute_offset % SchedPeriod) == 0;
55}
56
57/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
59 constexpr std::size_t start_offset = 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one.
62 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
63 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
64 std::size_t offset = start_offset;
65 while (offset < program.size()) {
66 const u64 instruction = program[offset];
67 if (!IsSchedInstruction(offset, start_offset)) {
68 if ((instruction & mask) == self_jumping_branch) {
69 // End on Maxwell's "nop" instruction
70 break;
71 }
72 if (instruction == 0) {
73 break;
74 }
75 }
76 offset++;
77 }
78 // The last instruction is included in the program size
79 return std::min(offset + 1, program.size());
80}
81
44/// Gets the shader program code from memory for the specified address 82/// Gets the shader program code from memory for the specified address
45ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, 83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
46 const u8* host_ptr) { 84 const u8* host_ptr) {
@@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
51 }); 89 });
52 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), 90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
53 program_code.size() * sizeof(u64)); 91 program_code.size() * sizeof(u64));
92 program_code.resize(CalculateProgramSize(program_code));
54 return program_code; 93 return program_code;
55} 94}
56 95
@@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
71 } 110 }
72} 111}
73 112
74/// Gets if the current instruction offset is a scheduler instruction
75constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
76 // Sched instructions appear once every 4 instructions.
77 constexpr std::size_t SchedPeriod = 4;
78 const std::size_t absolute_offset = offset - main_offset;
79 return (absolute_offset % SchedPeriod) == 0;
80}
81
82/// Describes primitive behavior on geometry shaders 113/// Describes primitive behavior on geometry shaders
83constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { 114constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
84 switch (primitive_mode) { 115 switch (primitive_mode) {
@@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
121 return {}; 152 return {};
122} 153}
123 154
124/// Calculates the size of a program stream
125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
126 constexpr std::size_t start_offset = 10;
127 // This is the encoded version of BRA that jumps to itself. All Nvidia
128 // shaders end with one.
129 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
130 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
131 std::size_t offset = start_offset;
132 std::size_t size = start_offset * sizeof(u64);
133 while (offset < program.size()) {
134 const u64 instruction = program[offset];
135 if (!IsSchedInstruction(offset, start_offset)) {
136 if ((instruction & mask) == self_jumping_branch) {
137 // End on Maxwell's "nop" instruction
138 break;
139 }
140 if (instruction == 0) {
141 break;
142 }
143 }
144 size += sizeof(u64);
145 offset++;
146 }
147 // The last instruction is included in the program size
148 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
149}
150
151/// Hashes one (or two) program streams 155/// Hashes one (or two) program streams
152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, 156u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { 157 const ProgramCode& code_b) {
154 if (size_a == 0) { 158 u64 unique_identifier = boost::hash_value(code);
155 size_a = CalculateProgramSize(code); 159 if (program_type == ProgramType::VertexA) {
156 } 160 // VertexA programs include two programs
157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); 161 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
158 if (program_type != ProgramType::VertexA) {
159 return unique_identifier;
160 }
161 // VertexA programs include two programs
162
163 std::size_t seed = 0;
164 boost::hash_combine(seed, unique_identifier);
165
166 if (size_b == 0) {
167 size_b = CalculateProgramSize(code_b);
168 } 162 }
169 const u64 identifier_b = 163 return unique_identifier;
170 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
171 boost::hash_combine(seed, identifier_b);
172 return static_cast<u64>(seed);
173} 164}
174 165
175/// Creates an unspecialized program from code streams 166/// Creates an unspecialized program from code streams
176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, 167std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
177 ProgramCode program_code, ProgramCode program_code_b) { 168 const std::optional<ShaderIR>& ir_b) {
178 GLShader::ShaderSetup setup(program_code);
179 setup.program.size_a = CalculateProgramSize(program_code);
180 setup.program.size_b = 0;
181 if (program_type == ProgramType::VertexA) {
182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
184 // stage here.
185 setup.SetProgramB(program_code_b);
186 setup.program.size_b = CalculateProgramSize(program_code_b);
187 }
188 setup.program.unique_identifier = GetUniqueIdentifier(
189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
190
191 switch (program_type) { 169 switch (program_type) {
192 case ProgramType::VertexA: 170 case ProgramType::VertexA:
193 case ProgramType::VertexB: 171 case ProgramType::VertexB:
194 return GLShader::GenerateVertexShader(device, setup); 172 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
195 case ProgramType::Geometry: 173 case ProgramType::Geometry:
196 return GLShader::GenerateGeometryShader(device, setup); 174 return GLShader::GenerateGeometryShader(device, ir);
197 case ProgramType::Fragment: 175 case ProgramType::Fragment:
198 return GLShader::GenerateFragmentShader(device, setup); 176 return GLShader::GenerateFragmentShader(device, ir);
199 case ProgramType::Compute: 177 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup); 178 return GLShader::GenerateComputeShader(device, ir);
201 default: 179 default:
202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); 180 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
203 return {}; 181 return {};
204 } 182 }
205} 183}
206 184
207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 185constexpr const char* GetProgramTypeName(ProgramType program_type) {
208 ProgramType program_type, const ProgramVariant& variant, 186 switch (program_type) {
209 bool hint_retrievable = false) { 187 case ProgramType::VertexA:
188 case ProgramType::VertexB:
189 return "VS";
190 case ProgramType::TessellationControl:
191 return "TCS";
192 case ProgramType::TessellationEval:
193 return "TES";
194 case ProgramType::Geometry:
195 return "GS";
196 case ProgramType::Fragment:
197 return "FS";
198 case ProgramType::Compute:
199 return "CS";
200 }
201 return "UNK";
202}
203
204Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
205 switch (program_type) {
206 case ProgramType::VertexA:
207 case ProgramType::VertexB:
208 return Tegra::Engines::ShaderType::Vertex;
209 case ProgramType::TessellationControl:
210 return Tegra::Engines::ShaderType::TesselationControl;
211 case ProgramType::TessellationEval:
212 return Tegra::Engines::ShaderType::TesselationEval;
213 case ProgramType::Geometry:
214 return Tegra::Engines::ShaderType::Geometry;
215 case ProgramType::Fragment:
216 return Tegra::Engines::ShaderType::Fragment;
217 case ProgramType::Compute:
218 return Tegra::Engines::ShaderType::Compute;
219 }
220 UNREACHABLE();
221 return {};
222}
223
224std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
225 return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
226}
227
228Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
229 Core::System& system, ProgramType program_type) {
230 if (program_type == ProgramType::Compute) {
231 return system.GPU().KeplerCompute();
232 } else {
233 return system.GPU().Maxwell3D();
234 }
235}
236
237std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
238 return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
239 GetConstBufferEngineInterface(system, program_type));
240}
241
242void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
243 for (const auto& key : usage.keys) {
244 const auto [buffer, offset] = key.first;
245 locker.InsertKey(buffer, offset, key.second);
246 }
247 for (const auto& [offset, sampler] : usage.bound_samplers) {
248 locker.InsertBoundSampler(offset, sampler);
249 }
250 for (const auto& [key, sampler] : usage.bindless_samplers) {
251 const auto [buffer, offset] = key;
252 locker.InsertBindlessSampler(buffer, offset, sampler);
253 }
254}
255
256CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
257 const ProgramCode& program_code, const ProgramCode& program_code_b,
258 const ProgramVariant& variant, ConstBufferLocker& locker,
259 bool hint_retrievable = false) {
260 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
261
262 const bool is_compute = program_type == ProgramType::Compute;
263 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
264 const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
265 std::optional<ShaderIR> ir_b;
266 if (!program_code_b.empty()) {
267 ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
268 }
269 const auto entries = GLShader::GetEntries(ir);
270
210 auto base_bindings{variant.base_bindings}; 271 auto base_bindings{variant.base_bindings};
211 const auto primitive_mode{variant.primitive_mode}; 272 const auto primitive_mode{variant.primitive_mode};
212 const auto texture_buffer_usage{variant.texture_buffer_usage}; 273 const auto texture_buffer_usage{variant.texture_buffer_usage};
213 274
214 std::string source = R"(#version 430 core 275 std::string source = fmt::format(R"(// {}
276#version 430 core
215#extension GL_ARB_separate_shader_objects : enable 277#extension GL_ARB_separate_shader_objects : enable
216#extension GL_ARB_shader_viewport_layer_array : enable 278#extension GL_ARB_shader_viewport_layer_array : enable
217#extension GL_EXT_shader_image_load_formatted : enable 279#extension GL_EXT_shader_image_load_formatted : enable
218#extension GL_NV_gpu_shader5 : enable 280#extension GL_NV_gpu_shader5 : enable
219#extension GL_NV_shader_thread_group : enable 281#extension GL_NV_shader_thread_group : enable
220#extension GL_NV_shader_thread_shuffle : enable 282#extension GL_NV_shader_thread_shuffle : enable
221)"; 283)",
222 if (program_type == ProgramType::Compute) { 284 GetShaderId(unique_identifier, program_type));
285 if (is_compute) {
223 source += "#extension GL_ARB_compute_variable_group_size : require\n"; 286 source += "#extension GL_ARB_compute_variable_group_size : require\n";
224 } 287 }
225 source += '\n'; 288 source += '\n';
226 289
227 if (program_type != ProgramType::Compute) { 290 if (!is_compute) {
228 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 291 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
229 } 292 }
230 293
@@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
268 } 331 }
269 332
270 source += '\n'; 333 source += '\n';
271 source += code; 334 source += GenerateGLSL(device, program_type, ir, ir_b);
272 335
273 OGLShader shader; 336 OGLShader shader;
274 shader.Create(source.c_str(), GetShaderType(program_type)); 337 shader.Create(source.c_str(), GetShaderType(program_type));
@@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
278 return program; 341 return program;
279} 342}
280 343
281std::set<GLenum> GetSupportedFormats() { 344std::unordered_set<GLenum> GetSupportedFormats() {
282 std::set<GLenum> supported_formats;
283
284 GLint num_formats{}; 345 GLint num_formats{};
285 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 346 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
286 347
287 std::vector<GLint> formats(num_formats); 348 std::vector<GLint> formats(num_formats);
288 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); 349 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
289 350
290 for (const GLint format : formats) 351 std::unordered_set<GLenum> supported_formats;
352 for (const GLint format : formats) {
291 supported_formats.insert(static_cast<GLenum>(format)); 353 supported_formats.insert(static_cast<GLenum>(format));
354 }
292 return supported_formats; 355 return supported_formats;
293} 356}
294 357
295} // Anonymous namespace 358} // Anonymous namespace
296 359
297CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, 360CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
298 GLShader::ProgramResult result) 361 GLShader::ShaderEntries entries, ProgramCode program_code,
299 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, 362 ProgramCode program_code_b)
300 unique_identifier{params.unique_identifier}, program_type{program_type}, 363 : RasterizerCacheObject{params.host_ptr}, system{params.system},
301 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, 364 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
302 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} 365 unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
366 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
367 if (!params.precompiled_variants) {
368 return;
369 }
370 for (const auto& pair : *params.precompiled_variants) {
371 auto locker = MakeLocker(system, program_type);
372 const auto& usage = pair->first;
373 FillLocker(*locker, usage);
374
375 std::unique_ptr<LockerVariant>* locker_variant = nullptr;
376 const auto it =
377 std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
378 return variant->locker->HasEqualKeys(*locker);
379 });
380 if (it == locker_variants.end()) {
381 locker_variant = &locker_variants.emplace_back();
382 *locker_variant = std::make_unique<LockerVariant>();
383 locker_variant->get()->locker = std::move(locker);
384 } else {
385 locker_variant = &*it;
386 }
387 locker_variant->get()->programs.emplace(usage.variant, pair->second);
388 }
389}
303 390
304Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 391Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
305 Maxwell::ShaderProgram program_type, 392 Maxwell::ShaderProgram program_type,
306 ProgramCode&& program_code, 393 ProgramCode program_code, ProgramCode program_code_b) {
307 ProgramCode&& program_code_b) {
308 const auto code_size{CalculateProgramSize(program_code)};
309 const auto code_size_b{CalculateProgramSize(program_code_b)};
310 auto result{
311 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
312 if (result.first.empty()) {
313 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
314 return {};
315 }
316
317 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 394 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
318 params.unique_identifier, GetProgramType(program_type), 395 params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
319 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), 396
320 std::move(program_code), std::move(program_code_b))); 397 ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)));
321 398 const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
399 // TODO(Rodrigo): Handle VertexA shaders
400 // std::optional<ShaderIR> ir_b;
401 // if (!program_code_b.empty()) {
402 // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
403 // }
322 return std::shared_ptr<CachedShader>( 404 return std::shared_ptr<CachedShader>(
323 new CachedShader(params, GetProgramType(program_type), std::move(result))); 405 new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
406 std::move(program_code), std::move(program_code_b)));
324} 407}
325 408
326Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 409Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
327 Maxwell::ShaderProgram program_type, 410 params.disk_cache.SaveRaw(
328 GLShader::ProgramResult result) { 411 ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
329 return std::shared_ptr<CachedShader>(
330 new CachedShader(params, GetProgramType(program_type), std::move(result)));
331}
332
333Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
334 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
335
336 const auto code_size{CalculateProgramSize(code)};
337 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
338 static_cast<u32>(code_size / sizeof(u64)), 0,
339 std::move(code), {}));
340 412
341 return std::shared_ptr<CachedShader>( 413 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute);
342 new CachedShader(params, ProgramType::Compute, std::move(result))); 414 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
415 return std::shared_ptr<CachedShader>(new CachedShader(
416 params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
343} 417}
344 418
345Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, 419Shader CachedShader::CreateFromCache(const ShaderParameters& params,
346 GLShader::ProgramResult result) { 420 const UnspecializedShader& unspecialized) {
347 return std::shared_ptr<CachedShader>( 421 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
348 new CachedShader(params, ProgramType::Compute, std::move(result))); 422 unspecialized.entries, unspecialized.code,
423 unspecialized.code_b));
349} 424}
350 425
351std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 426std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
352 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 427 UpdateVariant();
428
429 const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
353 auto& program = entry->second; 430 auto& program = entry->second;
354 if (is_cache_miss) { 431 if (is_cache_miss) {
355 program = TryLoadProgram(variant); 432 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
356 if (!program) { 433 variant, *curr_variant->locker);
357 program = SpecializeShader(code, entries, program_type, variant); 434 disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
358 disk_cache.SaveUsage(GetUsage(variant));
359 }
360 435
361 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); 436 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
362 } 437 }
@@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
372 return {program->handle, base_bindings}; 447 return {program->handle, base_bindings};
373} 448}
374 449
375CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { 450void CachedShader::UpdateVariant() {
376 const auto found = precompiled_programs.find(GetUsage(variant)); 451 if (curr_variant && !curr_variant->locker->IsConsistent()) {
377 if (found == precompiled_programs.end()) { 452 curr_variant = nullptr;
378 return {}; 453 }
454 if (!curr_variant) {
455 for (auto& variant : locker_variants) {
456 if (variant->locker->IsConsistent()) {
457 curr_variant = variant.get();
458 }
459 }
460 }
461 if (!curr_variant) {
462 auto& new_variant = locker_variants.emplace_back();
463 new_variant = std::make_unique<LockerVariant>();
464 new_variant->locker = MakeLocker(system, program_type);
465 curr_variant = new_variant.get();
379 } 466 }
380 return found->second;
381} 467}
382 468
383ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { 469ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
470 const ConstBufferLocker& locker) const {
384 ShaderDiskCacheUsage usage; 471 ShaderDiskCacheUsage usage;
385 usage.unique_identifier = unique_identifier; 472 usage.unique_identifier = unique_identifier;
386 usage.variant = variant; 473 usage.variant = variant;
474 usage.keys = locker.GetKeys();
475 usage.bound_samplers = locker.GetBoundSamplers();
476 usage.bindless_samplers = locker.GetBindlessSamplers();
387 return usage; 477 return usage;
388} 478}
389 479
@@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
399 return; 489 return;
400 } 490 }
401 const auto [raws, shader_usages] = *transferable; 491 const auto [raws, shader_usages] = *transferable;
402 492 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
403 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
404
405 const auto supported_formats{GetSupportedFormats()};
406 const auto unspecialized_shaders{
407 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
408 if (stop_loading) {
409 return; 493 return;
410 } 494 }
411 495
412 // Track if precompiled cache was altered during loading to know if we have to serialize the 496 const auto dumps = disk_cache.LoadPrecompiled();
413 // virtual precompiled cache file back to the hard drive 497 const auto supported_formats = GetSupportedFormats();
498
499 // Track if precompiled cache was altered during loading to know if we have to
500 // serialize the virtual precompiled cache file back to the hard drive
414 bool precompiled_cache_altered = false; 501 bool precompiled_cache_altered = false;
415 502
416 // Inform the frontend about shader build initialization 503 // Inform the frontend about shader build initialization
@@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
433 return; 520 return;
434 } 521 }
435 const auto& usage{shader_usages[i]}; 522 const auto& usage{shader_usages[i]};
436 LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
437 usage.unique_identifier, i, shader_usages.size());
438
439 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 523 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
440 const auto dump{dumps.find(usage)}; 524 const auto dump{dumps.find(usage)};
441 525
@@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
449 } 533 }
450 } 534 }
451 if (!shader) { 535 if (!shader) {
452 shader = SpecializeShader(unspecialized.code, unspecialized.entries, 536 auto locker{MakeLocker(system, unspecialized.program_type)};
453 unspecialized.program_type, usage.variant, true); 537 FillLocker(*locker, usage);
538 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
539 unspecialized.code, unspecialized.code_b, usage.variant,
540 *locker, true);
454 } 541 }
455 542
456 std::scoped_lock lock(mutex); 543 std::scoped_lock lock{mutex};
457 if (callback) { 544 if (callback) {
458 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 545 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
459 shader_usages.size()); 546 shader_usages.size());
460 } 547 }
461 548
462 precompiled_programs.emplace(usage, std::move(shader)); 549 precompiled_programs.emplace(usage, std::move(shader));
550
551 // TODO(Rodrigo): Is there a better way to do this?
552 precompiled_variants[usage.unique_identifier].push_back(
553 precompiled_programs.find(usage));
463 } 554 }
464 }; 555 };
465 556
466 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; 557 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
467 const std::size_t bucket_size{shader_usages.size() / num_workers}; 558 const std::size_t bucket_size{shader_usages.size() / num_workers};
468 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 559 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
469 std::vector<std::thread> threads(num_workers); 560 std::vector<std::thread> threads(num_workers);
@@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
483 if (compilation_failed) { 574 if (compilation_failed) {
484 // Invalidate the precompiled cache if a shader dumped shader was rejected 575 // Invalidate the precompiled cache if a shader dumped shader was rejected
485 disk_cache.InvalidatePrecompiled(); 576 disk_cache.InvalidatePrecompiled();
486 dumps.clear();
487 precompiled_cache_altered = true; 577 precompiled_cache_altered = true;
488 return; 578 return;
489 } 579 }
@@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
491 return; 581 return;
492 } 582 }
493 583
494 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before 584 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
495 // precompiling them 585 // before precompiling them
496 586
497 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 587 for (std::size_t i = 0; i < shader_usages.size(); ++i) {
498 const auto& usage{shader_usages[i]}; 588 const auto& usage{shader_usages[i]};
@@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
508 } 598 }
509} 599}
510 600
511CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( 601const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
512 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { 602 const auto it = precompiled_variants.find(unique_identifier);
603 return it == precompiled_variants.end() ? nullptr : &it->second;
604}
513 605
606CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
607 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
514 if (supported_formats.find(dump.binary_format) == supported_formats.end()) { 608 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
515 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); 609 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
516 return {}; 610 return {};
@@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
532 return shader; 626 return shader;
533} 627}
534 628
535std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( 629bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
536 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 630 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
537 const std::vector<ShaderDiskCacheRaw>& raws, 631 const std::vector<ShaderDiskCacheRaw>& raws) {
538 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
539 std::unordered_map<u64, UnspecializedShader> unspecialized;
540
541 if (callback) { 632 if (callback) {
542 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); 633 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
543 } 634 }
544 635
545 for (std::size_t i = 0; i < raws.size(); ++i) { 636 for (std::size_t i = 0; i < raws.size(); ++i) {
546 if (stop_loading) { 637 if (stop_loading) {
547 return {}; 638 return false;
548 } 639 }
549 const auto& raw{raws[i]}; 640 const auto& raw{raws[i]};
550 const u64 unique_identifier{raw.GetUniqueIdentifier()}; 641 const u64 unique_identifier{raw.GetUniqueIdentifier()};
551 const u64 calculated_hash{ 642 const u64 calculated_hash{
552 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; 643 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
553 if (unique_identifier != calculated_hash) { 644 if (unique_identifier != calculated_hash) {
554 LOG_ERROR( 645 LOG_ERROR(Render_OpenGL,
555 Render_OpenGL, 646 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
556 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", 647 "removing shader cache",
557 raw.GetUniqueIdentifier(), calculated_hash); 648 raw.GetUniqueIdentifier(), calculated_hash);
558 disk_cache.InvalidateTransferable(); 649 disk_cache.InvalidateTransferable();
559 return {}; 650 return false;
560 } 651 }
561 652
562 GLShader::ProgramResult result; 653 const u32 main_offset =
563 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { 654 raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
564 // If it's stored in the precompiled file, avoid decompiling it here 655 ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
565 const auto& stored_decompiled{it->second}; 656 const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
566 result = {stored_decompiled.code, stored_decompiled.entries}; 657 // TODO(Rodrigo): Handle VertexA shaders
567 } else { 658 // std::optional<ShaderIR> ir_b;
568 // Otherwise decompile the shader at boot and save the result to the decompiled file 659 // if (raw.HasProgramA()) {
569 result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), 660 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
570 raw.GetProgramCodeB()); 661 // }
571 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); 662
572 } 663 UnspecializedShader unspecialized;
573 664 unspecialized.entries = GLShader::GetEntries(ir);
574 precompiled_shaders.insert({unique_identifier, result}); 665 unspecialized.program_type = raw.GetProgramType();
575 666 unspecialized.code = raw.GetProgramCode();
576 unspecialized.insert( 667 unspecialized.code_b = raw.GetProgramCodeB();
577 {raw.GetUniqueIdentifier(), 668 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
578 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
579 669
580 if (callback) { 670 if (callback) {
581 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); 671 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
582 } 672 }
583 } 673 }
584 return unspecialized; 674 return true;
585} 675}
586 676
587Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 677Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
590 } 680 }
591 681
592 auto& memory_manager{system.GPU().MemoryManager()}; 682 auto& memory_manager{system.GPU().MemoryManager()};
593 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 683 const GPUVAddr address{GetShaderAddress(system, program)};
594 684
595 // Look up shader in the cache based on address 685 // Look up shader in the cache based on address
596 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 686 const auto host_ptr{memory_manager.GetPointer(address)};
597 Shader shader{TryGet(host_ptr)}; 687 Shader shader{TryGet(host_ptr)};
598 if (shader) { 688 if (shader) {
599 return last_shaders[static_cast<std::size_t>(program)] = shader; 689 return last_shaders[static_cast<std::size_t>(program)] = shader;
600 } 690 }
601 691
602 // No shader found - create a new one 692 // No shader found - create a new one
603 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 693 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
604 ProgramCode program_code_b; 694 ProgramCode code_b;
605 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; 695 if (program == Maxwell::ShaderProgram::VertexA) {
606 if (is_program_a) { 696 const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
607 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 697 code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
608 program_code_b = GetShaderCode(memory_manager, program_addr_b, 698 }
609 memory_manager.GetPointer(program_addr_b)); 699
610 } 700 const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
611 701 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
612 const auto unique_identifier = 702 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
613 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); 703 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
614 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 704 cpu_addr, host_ptr, unique_identifier};
615 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 705
616 host_ptr, unique_identifier}; 706 const auto found = unspecialized_shaders.find(unique_identifier);
617 707 if (found == unspecialized_shaders.end()) {
618 const auto found = precompiled_shaders.find(unique_identifier); 708 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
619 if (found == precompiled_shaders.end()) { 709 std::move(code_b));
620 shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
621 std::move(program_code_b));
622 } else { 710 } else {
623 shader = CachedShader::CreateStageFromCache(params, program, found->second); 711 shader = CachedShader::CreateFromCache(params, found->second);
624 } 712 }
625 Register(shader); 713 Register(shader);
626 714
@@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
638 // No kernel found - create a new one 726 // No kernel found - create a new one
639 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 727 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
640 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; 728 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
729 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
641 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 730 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
642 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 731 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
643 host_ptr, unique_identifier}; 732 cpu_addr, host_ptr, unique_identifier};
644 733
645 const auto found = precompiled_shaders.find(unique_identifier); 734 const auto found = unspecialized_shaders.find(unique_identifier);
646 if (found == precompiled_shaders.end()) { 735 if (found == unspecialized_shaders.end()) {
647 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 736 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
648 } else { 737 } else {
649 kernel = CachedShader::CreateKernelFromCache(params, found->second); 738 kernel = CachedShader::CreateFromCache(params, found->second);
650 } 739 }
651 740
652 Register(kernel); 741 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index de195cc5d..6bd7c9cf1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,9 +8,10 @@
8#include <atomic> 8#include <atomic>
9#include <bitset> 9#include <bitset>
10#include <memory> 10#include <memory>
11#include <set> 11#include <string>
12#include <tuple> 12#include <tuple>
13#include <unordered_map> 13#include <unordered_map>
14#include <unordered_set>
14#include <vector> 15#include <vector>
15 16
16#include <glad/glad.h> 17#include <glad/glad.h>
@@ -20,6 +21,8 @@
20#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
21#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24#include "video_core/shader/const_buffer_locker.h"
25#include "video_core/shader/shader_ir.h"
23 26
24namespace Core { 27namespace Core {
25class System; 28class System;
@@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>;
40using CachedProgram = std::shared_ptr<OGLProgram>; 43using CachedProgram = std::shared_ptr<OGLProgram>;
41using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
42using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 45using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
43using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; 46using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
47
48struct UnspecializedShader {
49 GLShader::ShaderEntries entries;
50 ProgramType program_type;
51 ProgramCode code;
52 ProgramCode code_b;
53};
44 54
45struct ShaderParameters { 55struct ShaderParameters {
56 Core::System& system;
46 ShaderDiskCacheOpenGL& disk_cache; 57 ShaderDiskCacheOpenGL& disk_cache;
47 const PrecompiledPrograms& precompiled_programs; 58 const PrecompiledVariants* precompiled_variants;
48 const Device& device; 59 const Device& device;
49 VAddr cpu_addr; 60 VAddr cpu_addr;
50 u8* host_ptr; 61 u8* host_ptr;
@@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject {
55public: 66public:
56 static Shader CreateStageFromMemory(const ShaderParameters& params, 67 static Shader CreateStageFromMemory(const ShaderParameters& params,
57 Maxwell::ShaderProgram program_type, 68 Maxwell::ShaderProgram program_type,
58 ProgramCode&& program_code, ProgramCode&& program_code_b); 69 ProgramCode program_code, ProgramCode program_code_b);
59 70 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
60 static Shader CreateStageFromCache(const ShaderParameters& params,
61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result);
63 71
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); 72 static Shader CreateFromCache(const ShaderParameters& params,
65 73 const UnspecializedShader& unspecialized);
66 static Shader CreateKernelFromCache(const ShaderParameters& params,
67 GLShader::ProgramResult result);
68 74
69 VAddr GetCpuAddr() const override { 75 VAddr GetCpuAddr() const override {
70 return cpu_addr; 76 return cpu_addr;
71 } 77 }
72 78
73 std::size_t GetSizeInBytes() const override { 79 std::size_t GetSizeInBytes() const override {
74 return shader_length; 80 return program_code.size() * sizeof(u64);
75 } 81 }
76 82
77 /// Gets the shader entries for the shader 83 /// Gets the shader entries for the shader
@@ -83,24 +89,36 @@ public:
83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 89 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
84 90
85private: 91private:
92 struct LockerVariant {
93 std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
94 std::unordered_map<ProgramVariant, CachedProgram> programs;
95 };
96
86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type, 97 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
87 GLShader::ProgramResult result); 98 GLShader::ShaderEntries entries, ProgramCode program_code,
99 ProgramCode program_code_b);
88 100
89 CachedProgram TryLoadProgram(const ProgramVariant& variant) const; 101 void UpdateVariant();
90 102
91 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; 103 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
104 const VideoCommon::Shader::ConstBufferLocker& locker) const;
105
106 Core::System& system;
107 ShaderDiskCacheOpenGL& disk_cache;
108 const Device& device;
92 109
93 VAddr cpu_addr{}; 110 VAddr cpu_addr{};
111
94 u64 unique_identifier{}; 112 u64 unique_identifier{};
95 ProgramType program_type{}; 113 ProgramType program_type{};
96 ShaderDiskCacheOpenGL& disk_cache;
97 const PrecompiledPrograms& precompiled_programs;
98 114
99 GLShader::ShaderEntries entries; 115 GLShader::ShaderEntries entries;
100 std::string code;
101 std::size_t shader_length{};
102 116
103 std::unordered_map<ProgramVariant, CachedProgram> programs; 117 ProgramCode program_code;
118 ProgramCode program_code_b;
119
120 LockerVariant* curr_variant = nullptr;
121 std::vector<std::unique_ptr<LockerVariant>> locker_variants;
104}; 122};
105 123
106class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 124class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -123,21 +141,26 @@ protected:
123 void FlushObjectInner(const Shader& object) override {} 141 void FlushObjectInner(const Shader& object) override {}
124 142
125private: 143private:
126 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( 144 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
127 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 145 const VideoCore::DiskResourceLoadCallback& callback,
128 const std::vector<ShaderDiskCacheRaw>& raws, 146 const std::vector<ShaderDiskCacheRaw>& raws);
129 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
130 147
131 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, 148 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
132 const std::set<GLenum>& supported_formats); 149 const std::unordered_set<GLenum>& supported_formats);
150
151 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
133 152
134 Core::System& system; 153 Core::System& system;
135 Core::Frontend::EmuWindow& emu_window; 154 Core::Frontend::EmuWindow& emu_window;
136 const Device& device; 155 const Device& device;
156
137 ShaderDiskCacheOpenGL disk_cache; 157 ShaderDiskCacheOpenGL disk_cache;
138 158
139 PrecompiledShaders precompiled_shaders;
140 PrecompiledPrograms precompiled_programs; 159 PrecompiledPrograms precompiled_programs;
160 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
161
162 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
163
141 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 164 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
142}; 165};
143 166
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index baec66ff0..030550c53 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -415,27 +415,6 @@ public:
415 return code.GetResult(); 415 return code.GetResult();
416 } 416 }
417 417
418 ShaderEntries GetShaderEntries() const {
419 ShaderEntries entries;
420 for (const auto& cbuf : ir.GetConstantBuffers()) {
421 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
422 cbuf.first);
423 }
424 for (const auto& sampler : ir.GetSamplers()) {
425 entries.samplers.emplace_back(sampler);
426 }
427 for (const auto& [offset, image] : ir.GetImages()) {
428 entries.images.emplace_back(image);
429 }
430 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
431 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
432 usage.is_read, usage.is_written);
433 }
434 entries.clip_distances = ir.GetClipDistances();
435 entries.shader_length = ir.GetLength();
436 return entries;
437 }
438
439private: 418private:
440 friend class ASTDecompiler; 419 friend class ASTDecompiler;
441 friend class ExprDecompiler; 420 friend class ExprDecompiler;
@@ -2338,6 +2317,11 @@ public:
2338 inner += expr.value ? "true" : "false"; 2317 inner += expr.value ? "true" : "false";
2339 } 2318 }
2340 2319
2320 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2321 inner +=
2322 "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
2323 }
2324
2341 const std::string& GetResult() const { 2325 const std::string& GetResult() const {
2342 return inner; 2326 return inner;
2343 } 2327 }
@@ -2476,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() {
2476 2460
2477} // Anonymous namespace 2461} // Anonymous namespace
2478 2462
2463ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2464 ShaderEntries entries;
2465 for (const auto& cbuf : ir.GetConstantBuffers()) {
2466 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2467 cbuf.first);
2468 }
2469 for (const auto& sampler : ir.GetSamplers()) {
2470 entries.samplers.emplace_back(sampler);
2471 }
2472 for (const auto& [offset, image] : ir.GetImages()) {
2473 entries.images.emplace_back(image);
2474 }
2475 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2476 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2477 usage.is_written);
2478 }
2479 entries.clip_distances = ir.GetClipDistances();
2480 entries.shader_length = ir.GetLength();
2481 return entries;
2482}
2483
2479std::string GetCommonDeclarations() { 2484std::string GetCommonDeclarations() {
2480 return fmt::format( 2485 return R"(#define ftoi floatBitsToInt
2481 "#define ftoi floatBitsToInt\n" 2486#define ftou floatBitsToUint
2482 "#define ftou floatBitsToUint\n" 2487#define itof intBitsToFloat
2483 "#define itof intBitsToFloat\n" 2488#define utof uintBitsToFloat
2484 "#define utof uintBitsToFloat\n\n" 2489
2485 "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" 2490bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2486 " bvec2 is_nan1 = isnan(pair1);\n" 2491 bvec2 is_nan1 = isnan(pair1);
2487 " bvec2 is_nan2 = isnan(pair2);\n" 2492 bvec2 is_nan2 = isnan(pair2);
2488 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " 2493 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2489 "is_nan2.y);\n" 2494}
2490 "}}\n\n"); 2495)";
2491} 2496}
2492 2497
2493ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, 2498std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
2494 const std::string& suffix) { 2499 const std::string& suffix) {
2495 GLSLDecompiler decompiler(device, ir, stage, suffix); 2500 GLSLDecompiler decompiler(device, ir, stage, suffix);
2496 decompiler.Decompile(); 2501 decompiler.Decompile();
2497 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 2502 return decompiler.GetResult();
2498} 2503}
2499 2504
2500} // namespace OpenGL::GLShader 2505} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e538dc001..fead2a51e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
34 34
35namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
36 36
37struct ShaderEntries;
38
39using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
40using ProgramResult = std::pair<std::string, ShaderEntries>;
41using SamplerEntry = VideoCommon::Shader::Sampler; 38using SamplerEntry = VideoCommon::Shader::Sampler;
42using ImageEntry = VideoCommon::Shader::Image; 39using ImageEntry = VideoCommon::Shader::Image;
43 40
@@ -93,9 +90,11 @@ struct ShaderEntries {
93 std::size_t shader_length{}; 90 std::size_t shader_length{};
94}; 91};
95 92
93ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
94
96std::string GetCommonDeclarations(); 95std::string GetCommonDeclarations();
97 96
98ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 97std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
99 ProgramType stage, const std::string& suffix); 98 ProgramType stage, const std::string& suffix);
100 99
101} // namespace OpenGL::GLShader 100} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 74cc33476..184a565e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -22,6 +22,29 @@
22 22
23namespace OpenGL { 23namespace OpenGL {
24 24
25using VideoCommon::Shader::BindlessSamplerMap;
26using VideoCommon::Shader::BoundSamplerMap;
27using VideoCommon::Shader::KeyMap;
28
29namespace {
30
31struct ConstBufferKey {
32 u32 cbuf;
33 u32 offset;
34 u32 value;
35};
36
37struct BoundSamplerKey {
38 u32 offset;
39 Tegra::Engines::SamplerDescriptor sampler;
40};
41
42struct BindlessSamplerKey {
43 u32 cbuf;
44 u32 offset;
45 Tegra::Engines::SamplerDescriptor sampler;
46};
47
25using ShaderCacheVersionHash = std::array<u8, 64>; 48using ShaderCacheVersionHash = std::array<u8, 64>;
26 49
27enum class TransferableEntryKind : u32 { 50enum class TransferableEntryKind : u32 {
@@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 {
29 Usage, 52 Usage,
30}; 53};
31 54
32enum class PrecompiledEntryKind : u32 { 55constexpr u32 NativeVersion = 5;
33 Decompiled,
34 Dump,
35};
36
37constexpr u32 NativeVersion = 4;
38 56
39// Making sure sizes doesn't change by accident 57// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 16); 58static_assert(sizeof(BaseBindings) == 16);
41static_assert(sizeof(ShaderDiskCacheUsage) == 40);
42
43namespace {
44 59
45ShaderCacheVersionHash GetShaderCacheVersionHash() { 60ShaderCacheVersionHash GetShaderCacheVersionHash() {
46 ShaderCacheVersionHash hash{}; 61 ShaderCacheVersionHash hash{};
@@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
49 return hash; 64 return hash;
50} 65}
51 66
52} // namespace 67} // Anonymous namespace
53 68
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 69ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 70 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 71 : unique_identifier{unique_identifier}, program_type{program_type},
58 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
59 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} 72 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
60 73
61ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 74ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
90bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
91 if (file.WriteObject(unique_identifier) != 1 || 104 if (file.WriteObject(unique_identifier) != 1 ||
92 file.WriteObject(static_cast<u32>(program_type)) != 1 || 105 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
93 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { 106 file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
107 file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
94 return false; 108 return false;
95 } 109 }
96 110
97 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) 111 if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
98 return false; 112 return false;
99 113
100 if (HasProgramA() && 114 if (HasProgramA() &&
101 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { 115 file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
102 return false; 116 return false;
103 } 117 }
104 return true; 118 return true;
@@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
127 u32 version{}; 141 u32 version{};
128 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { 142 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
129 LOG_ERROR(Render_OpenGL, 143 LOG_ERROR(Render_OpenGL,
130 "Failed to get transferable cache version for title id={} - skipping", 144 "Failed to get transferable cache version for title id={}, skipping",
131 GetTitleID()); 145 GetTitleID());
132 return {}; 146 return {};
133 } 147 }
134 148
135 if (version < NativeVersion) { 149 if (version < NativeVersion) {
136 LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); 150 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
137 file.Close(); 151 file.Close();
138 InvalidateTransferable(); 152 InvalidateTransferable();
139 is_usable = true; 153 is_usable = true;
@@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
141 } 155 }
142 if (version > NativeVersion) { 156 if (version > NativeVersion) {
143 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " 157 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
144 "of the emulator - skipping"); 158 "of the emulator, skipping");
145 return {}; 159 return {};
146 } 160 }
147 161
148 // Version is valid, load the shaders 162 // Version is valid, load the shaders
163 constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
149 std::vector<ShaderDiskCacheRaw> raws; 164 std::vector<ShaderDiskCacheRaw> raws;
150 std::vector<ShaderDiskCacheUsage> usages; 165 std::vector<ShaderDiskCacheUsage> usages;
151 while (file.Tell() < file.GetSize()) { 166 while (file.Tell() < file.GetSize()) {
152 TransferableEntryKind kind{}; 167 TransferableEntryKind kind{};
153 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 168 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
154 LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); 169 LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
155 return {}; 170 return {};
156 } 171 }
157 172
@@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
159 case TransferableEntryKind::Raw: { 174 case TransferableEntryKind::Raw: {
160 ShaderDiskCacheRaw entry; 175 ShaderDiskCacheRaw entry;
161 if (!entry.Load(file)) { 176 if (!entry.Load(file)) {
162 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); 177 LOG_ERROR(Render_OpenGL, error_loading);
163 return {}; 178 return {};
164 } 179 }
165 transferable.insert({entry.GetUniqueIdentifier(), {}}); 180 transferable.insert({entry.GetUniqueIdentifier(), {}});
@@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
167 break; 182 break;
168 } 183 }
169 case TransferableEntryKind::Usage: { 184 case TransferableEntryKind::Usage: {
170 ShaderDiskCacheUsage usage{}; 185 ShaderDiskCacheUsage usage;
171 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { 186
172 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); 187 u32 num_keys{};
188 u32 num_bound_samplers{};
189 u32 num_bindless_samplers{};
190 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
191 file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
192 file.ReadArray(&num_bound_samplers, 1) != 1 ||
193 file.ReadArray(&num_bindless_samplers, 1) != 1) {
194 LOG_ERROR(Render_OpenGL, error_loading);
173 return {}; 195 return {};
174 } 196 }
197
198 std::vector<ConstBufferKey> keys(num_keys);
199 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
200 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
201 if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
202 file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
203 bound_samplers.size() ||
204 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
205 bindless_samplers.size()) {
206 LOG_ERROR(Render_OpenGL, error_loading);
207 return {};
208 }
209 for (const auto& key : keys) {
210 usage.keys.insert({{key.cbuf, key.offset}, key.value});
211 }
212 for (const auto& key : bound_samplers) {
213 usage.bound_samplers.emplace(key.offset, key.sampler);
214 }
215 for (const auto& key : bindless_samplers) {
216 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
217 }
218
175 usages.push_back(std::move(usage)); 219 usages.push_back(std::move(usage));
176 break; 220 break;
177 } 221 }
178 default: 222 default:
179 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", 223 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
180 static_cast<u32>(kind)); 224 static_cast<u32>(kind));
181 return {}; 225 return {};
182 } 226 }
@@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
186 return {{std::move(raws), std::move(usages)}}; 230 return {{std::move(raws), std::move(usages)}};
187} 231}
188 232
189std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> 233std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
190ShaderDiskCacheOpenGL::LoadPrecompiled() { 234ShaderDiskCacheOpenGL::LoadPrecompiled() {
191 if (!is_usable) { 235 if (!is_usable) {
192 return {}; 236 return {};
193 } 237 }
194 238
195 FileUtil::IOFile file(GetPrecompiledPath(), "rb"); 239 std::string path = GetPrecompiledPath();
240 FileUtil::IOFile file(path, "rb");
196 if (!file.IsOpen()) { 241 if (!file.IsOpen()) {
197 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 242 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
198 GetTitleID()); 243 GetTitleID());
@@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
202 const auto result = LoadPrecompiledFile(file); 247 const auto result = LoadPrecompiledFile(file);
203 if (!result) { 248 if (!result) {
204 LOG_INFO(Render_OpenGL, 249 LOG_INFO(Render_OpenGL,
205 "Failed to load precompiled cache for game with title id={} - removing", 250 "Failed to load precompiled cache for game with title id={}, removing",
206 GetTitleID()); 251 GetTitleID());
207 file.Close(); 252 file.Close();
208 InvalidatePrecompiled(); 253 InvalidatePrecompiled();
@@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
211 return *result; 256 return *result;
212} 257}
213 258
214std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> 259std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
215ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 260ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
216 // Read compressed file from disk and decompress to virtual precompiled cache file 261 // Read compressed file from disk and decompress to virtual precompiled cache file
217 std::vector<u8> compressed(file.GetSize()); 262 std::vector<u8> compressed(file.GetSize());
@@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
231 return {}; 276 return {};
232 } 277 }
233 278
234 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
235 ShaderDumpsMap dumps; 279 ShaderDumpsMap dumps;
236 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 280 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
237 PrecompiledEntryKind kind{}; 281 u32 num_keys{};
238 if (!LoadObjectFromPrecompiled(kind)) { 282 u32 num_bound_samplers{};
283 u32 num_bindless_samplers{};
284 ShaderDiskCacheUsage usage;
285 if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
286 !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
287 !LoadObjectFromPrecompiled(num_bound_samplers) ||
288 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
239 return {}; 289 return {};
240 } 290 }
241 291 std::vector<ConstBufferKey> keys(num_keys);
242 switch (kind) { 292 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
243 case PrecompiledEntryKind::Decompiled: { 293 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
244 u64 unique_identifier{}; 294 if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
245 if (!LoadObjectFromPrecompiled(unique_identifier)) { 295 !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
246 return {}; 296 bound_samplers.size() ||
247 } 297 !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
248 298 bindless_samplers.size()) {
249 auto entry = LoadDecompiledEntry();
250 if (!entry) {
251 return {};
252 }
253 decompiled.insert({unique_identifier, std::move(*entry)});
254 break;
255 }
256 case PrecompiledEntryKind::Dump: {
257 ShaderDiskCacheUsage usage;
258 if (!LoadObjectFromPrecompiled(usage)) {
259 return {};
260 }
261
262 ShaderDiskCacheDump dump;
263 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
264 return {};
265 }
266
267 u32 binary_length{};
268 if (!LoadObjectFromPrecompiled(binary_length)) {
269 return {};
270 }
271
272 dump.binary.resize(binary_length);
273 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
274 return {};
275 }
276
277 dumps.insert({usage, dump});
278 break;
279 }
280 default:
281 return {}; 299 return {};
282 } 300 }
283 } 301 for (const auto& key : keys) {
284 return {{decompiled, dumps}}; 302 usage.keys.insert({{key.cbuf, key.offset}, key.value});
285}
286
287std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
288 u32 code_size{};
289 if (!LoadObjectFromPrecompiled(code_size)) {
290 return {};
291 }
292
293 std::string code(code_size, '\0');
294 if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
295 return {};
296 }
297
298 ShaderDiskCacheDecompiled entry;
299 entry.code = std::move(code);
300
301 u32 const_buffers_count{};
302 if (!LoadObjectFromPrecompiled(const_buffers_count)) {
303 return {};
304 }
305
306 for (u32 i = 0; i < const_buffers_count; ++i) {
307 u32 max_offset{};
308 u32 index{};
309 bool is_indirect{};
310 if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
311 !LoadObjectFromPrecompiled(is_indirect)) {
312 return {};
313 } 303 }
314 entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); 304 for (const auto& key : bound_samplers) {
315 } 305 usage.bound_samplers.emplace(key.offset, key.sampler);
316
317 u32 samplers_count{};
318 if (!LoadObjectFromPrecompiled(samplers_count)) {
319 return {};
320 }
321
322 for (u32 i = 0; i < samplers_count; ++i) {
323 u64 offset{};
324 u64 index{};
325 u32 type{};
326 bool is_array{};
327 bool is_shadow{};
328 bool is_bindless{};
329 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
330 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
331 !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
332 return {};
333 } 306 }
334 entry.entries.samplers.emplace_back( 307 for (const auto& key : bindless_samplers) {
335 static_cast<std::size_t>(offset), static_cast<std::size_t>(index), 308 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
336 static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
337 }
338
339 u32 images_count{};
340 if (!LoadObjectFromPrecompiled(images_count)) {
341 return {};
342 }
343 for (u32 i = 0; i < images_count; ++i) {
344 u64 offset{};
345 u64 index{};
346 u32 type{};
347 u8 is_bindless{};
348 u8 is_written{};
349 u8 is_read{};
350 u8 is_atomic{};
351 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
352 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
353 !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
354 !LoadObjectFromPrecompiled(is_atomic)) {
355 return {};
356 } 309 }
357 entry.entries.images.emplace_back(
358 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
359 static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
360 is_read != 0, is_atomic != 0);
361 }
362 310
363 u32 global_memory_count{}; 311 ShaderDiskCacheDump dump;
364 if (!LoadObjectFromPrecompiled(global_memory_count)) { 312 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
365 return {};
366 }
367 for (u32 i = 0; i < global_memory_count; ++i) {
368 u32 cbuf_index{};
369 u32 cbuf_offset{};
370 bool is_read{};
371 bool is_written{};
372 if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
373 !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
374 return {}; 313 return {};
375 } 314 }
376 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
377 is_written);
378 }
379 315
380 for (auto& clip_distance : entry.entries.clip_distances) { 316 u32 binary_length{};
381 if (!LoadObjectFromPrecompiled(clip_distance)) { 317 if (!LoadObjectFromPrecompiled(binary_length)) {
382 return {}; 318 return {};
383 } 319 }
384 }
385
386 u64 shader_length{};
387 if (!LoadObjectFromPrecompiled(shader_length)) {
388 return {};
389 }
390 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
391
392 return entry;
393}
394
395bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
396 const GLShader::ShaderEntries& entries) {
397 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
398 !SaveObjectToPrecompiled(unique_identifier) ||
399 !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
400 !SaveArrayToPrecompiled(code.data(), code.size())) {
401 return false;
402 }
403
404 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
405 return false;
406 }
407 for (const auto& cbuf : entries.const_buffers) {
408 if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
409 !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
410 !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
411 return false;
412 }
413 }
414
415 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
416 return false;
417 }
418 for (const auto& sampler : entries.samplers) {
419 if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
420 !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
421 !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
422 !SaveObjectToPrecompiled(sampler.IsArray()) ||
423 !SaveObjectToPrecompiled(sampler.IsShadow()) ||
424 !SaveObjectToPrecompiled(sampler.IsBindless())) {
425 return false;
426 }
427 }
428
429 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
430 return false;
431 }
432 for (const auto& image : entries.images) {
433 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
434 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
435 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
436 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
437 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
438 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
439 !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
440 return false;
441 }
442 }
443 320
444 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { 321 dump.binary.resize(binary_length);
445 return false; 322 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
446 } 323 return {};
447 for (const auto& gmem : entries.global_memory_entries) {
448 if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
449 !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
450 !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
451 return false;
452 }
453 }
454
455 for (const bool clip_distance : entries.clip_distances) {
456 if (!SaveObjectToPrecompiled(clip_distance)) {
457 return false;
458 } 324 }
459 }
460 325
461 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 326 dumps.emplace(std::move(usage), dump);
462 return false;
463 } 327 }
464 328 return dumps;
465 return true;
466} 329}
467 330
468void ShaderDiskCacheOpenGL::InvalidateTransferable() { 331void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
494 } 357 }
495 358
496 FileUtil::IOFile file = AppendTransferableFile(); 359 FileUtil::IOFile file = AppendTransferableFile();
497 if (!file.IsOpen()) 360 if (!file.IsOpen()) {
498 return; 361 return;
362 }
499 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { 363 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
500 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); 364 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
501 file.Close(); 365 file.Close();
502 InvalidateTransferable(); 366 InvalidateTransferable();
503 return; 367 return;
@@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
523 FileUtil::IOFile file = AppendTransferableFile(); 387 FileUtil::IOFile file = AppendTransferableFile();
524 if (!file.IsOpen()) 388 if (!file.IsOpen())
525 return; 389 return;
526 390 const auto Close = [&] {
527 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { 391 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
528 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
529 file.Close(); 392 file.Close();
530 InvalidateTransferable(); 393 InvalidateTransferable();
531 return; 394 };
532 }
533}
534 395
535void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, 396 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
536 const GLShader::ShaderEntries& entries) { 397 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
537 if (!is_usable) { 398 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
399 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
400 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
401 Close();
538 return; 402 return;
539 } 403 }
540 404 for (const auto& [pair, value] : usage.keys) {
541 if (precompiled_cache_virtual_file.GetSize() == 0) { 405 const auto [cbuf, offset] = pair;
542 SavePrecompiledHeaderToVirtualPrecompiledCache(); 406 if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
407 Close();
408 return;
409 }
543 } 410 }
544 411 for (const auto& [offset, sampler] : usage.bound_samplers) {
545 if (!SaveDecompiledFile(unique_identifier, code, entries)) { 412 if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
546 LOG_ERROR(Render_OpenGL, 413 Close();
547 "Failed to save decompiled entry to the precompiled file - removing"); 414 return;
548 InvalidatePrecompiled(); 415 }
416 }
417 for (const auto& [pair, sampler] : usage.bindless_samplers) {
418 const auto [cbuf, offset] = pair;
419 if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
420 Close();
421 return;
422 }
549 } 423 }
550} 424}
551 425
@@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
554 return; 428 return;
555 } 429 }
556 430
431 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
432 // when writing the dump. This should be done the moment I get access to write to the virtual
433 // file.
434 if (precompiled_cache_virtual_file.GetSize() == 0) {
435 SavePrecompiledHeaderToVirtualPrecompiledCache();
436 }
437
557 GLint binary_length{}; 438 GLint binary_length{};
558 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 439 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
559 440
@@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
561 std::vector<u8> binary(binary_length); 442 std::vector<u8> binary(binary_length);
562 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 443 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
563 444
564 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || 445 const auto Close = [&] {
565 !SaveObjectToPrecompiled(usage) || 446 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
566 !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
567 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
568 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
569 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
570 usage.unique_identifier); 447 usage.unique_identifier);
571 InvalidatePrecompiled(); 448 InvalidatePrecompiled();
449 };
450
451 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
452 !SaveObjectToPrecompiled(usage.variant) ||
453 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
454 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
455 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
456 Close();
572 return; 457 return;
573 } 458 }
459 for (const auto& [pair, value] : usage.keys) {
460 const auto [cbuf, offset] = pair;
461 if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
462 Close();
463 return;
464 }
465 }
466 for (const auto& [offset, sampler] : usage.bound_samplers) {
467 if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
468 Close();
469 return;
470 }
471 }
472 for (const auto& [pair, sampler] : usage.bindless_samplers) {
473 const auto [cbuf, offset] = pair;
474 if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
475 Close();
476 return;
477 }
478 }
479 if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
480 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
481 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
482 Close();
483 }
574} 484}
575 485
576FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { 486FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
577 if (!EnsureDirectories()) 487 if (!EnsureDirectories()) {
578 return {}; 488 return {};
489 }
579 490
580 const auto transferable_path{GetTransferablePath()}; 491 const auto transferable_path{GetTransferablePath()};
581 const bool existed = FileUtil::Exists(transferable_path); 492 const bool existed = FileUtil::Exists(transferable_path);
@@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
607 518
608void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { 519void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
609 precompiled_cache_virtual_file_offset = 0; 520 precompiled_cache_virtual_file_offset = 0;
610 const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); 521 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
611 const std::vector<u8>& compressed = 522 const std::vector<u8> compressed =
612 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); 523 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
613 524
614 const auto precompiled_path{GetPrecompiledPath()}; 525 const auto precompiled_path{GetPrecompiledPath()};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 9595bd71b..db23ada93 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -8,6 +8,7 @@
8#include <optional> 8#include <optional>
9#include <string> 9#include <string>
10#include <tuple> 10#include <tuple>
11#include <type_traits>
11#include <unordered_map> 12#include <unordered_map>
12#include <unordered_set> 13#include <unordered_set>
13#include <utility> 14#include <utility>
@@ -19,6 +20,7 @@
19#include "common/common_types.h" 20#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 21#include "core/file_sys/vfs_vector.h"
21#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/shader/const_buffer_locker.h"
22 24
23namespace Core { 25namespace Core {
24class System; 26class System;
@@ -53,6 +55,7 @@ struct BaseBindings {
53 return !operator==(rhs); 55 return !operator==(rhs);
54 } 56 }
55}; 57};
58static_assert(std::is_trivially_copyable_v<BaseBindings>);
56 59
57/// Describes the different variants a single program can be compiled. 60/// Describes the different variants a single program can be compiled.
58struct ProgramVariant { 61struct ProgramVariant {
@@ -70,13 +73,20 @@ struct ProgramVariant {
70 } 73 }
71}; 74};
72 75
76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77
73/// Describes how a shader is used. 78/// Describes how a shader is used.
74struct ShaderDiskCacheUsage { 79struct ShaderDiskCacheUsage {
75 u64 unique_identifier{}; 80 u64 unique_identifier{};
76 ProgramVariant variant; 81 ProgramVariant variant;
82 VideoCommon::Shader::KeyMap keys;
83 VideoCommon::Shader::BoundSamplerMap bound_samplers;
84 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
77 85
78 bool operator==(const ShaderDiskCacheUsage& rhs) const { 86 bool operator==(const ShaderDiskCacheUsage& rhs) const {
79 return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); 87 return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
88 std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
89 rhs.bindless_samplers);
80 } 90 }
81 91
82 bool operator!=(const ShaderDiskCacheUsage& rhs) const { 92 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -123,8 +133,7 @@ namespace OpenGL {
123class ShaderDiskCacheRaw { 133class ShaderDiskCacheRaw {
124public: 134public:
125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 135 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
126 u32 program_code_size, u32 program_code_size_b, 136 ProgramCode program_code, ProgramCode program_code_b = {});
127 ProgramCode program_code, ProgramCode program_code_b);
128 ShaderDiskCacheRaw(); 137 ShaderDiskCacheRaw();
129 ~ShaderDiskCacheRaw(); 138 ~ShaderDiskCacheRaw();
130 139
@@ -155,22 +164,14 @@ public:
155private: 164private:
156 u64 unique_identifier{}; 165 u64 unique_identifier{};
157 ProgramType program_type{}; 166 ProgramType program_type{};
158 u32 program_code_size{};
159 u32 program_code_size_b{};
160 167
161 ProgramCode program_code; 168 ProgramCode program_code;
162 ProgramCode program_code_b; 169 ProgramCode program_code_b;
163}; 170};
164 171
165/// Contains decompiled data from a shader
166struct ShaderDiskCacheDecompiled {
167 std::string code;
168 GLShader::ShaderEntries entries;
169};
170
171/// Contains an OpenGL dumped binary program 172/// Contains an OpenGL dumped binary program
172struct ShaderDiskCacheDump { 173struct ShaderDiskCacheDump {
173 GLenum binary_format; 174 GLenum binary_format{};
174 std::vector<u8> binary; 175 std::vector<u8> binary;
175}; 176};
176 177
@@ -184,9 +185,7 @@ public:
184 LoadTransferable(); 185 LoadTransferable();
185 186
186 /// Loads current game's precompiled cache. Invalidates on failure. 187 /// Loads current game's precompiled cache. Invalidates on failure.
187 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 188 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
188 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
189 LoadPrecompiled();
190 189
191 /// Removes the transferable (and precompiled) cache file. 190 /// Removes the transferable (and precompiled) cache file.
192 void InvalidateTransferable(); 191 void InvalidateTransferable();
@@ -200,10 +199,6 @@ public:
200 /// Saves shader usage to the transferable file. Does not check for collisions. 199 /// Saves shader usage to the transferable file. Does not check for collisions.
201 void SaveUsage(const ShaderDiskCacheUsage& usage); 200 void SaveUsage(const ShaderDiskCacheUsage& usage);
202 201
203 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
204 void SaveDecompiled(u64 unique_identifier, const std::string& code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Saves a dump entry to the precompiled file. Does not check for collisions. 202 /// Saves a dump entry to the precompiled file. Does not check for collisions.
208 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 203 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
209 204
@@ -212,18 +207,9 @@ public:
212 207
213private: 208private:
214 /// Loads the transferable cache. Returns empty on failure. 209 /// Loads the transferable cache. Returns empty on failure.
215 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 210 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
216 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
217 LoadPrecompiledFile(FileUtil::IOFile& file); 211 LoadPrecompiledFile(FileUtil::IOFile& file);
218 212
219 /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
220 /// failure.
221 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
222
223 /// Saves a decompiled entry to the passed file. Returns true on success.
224 bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
225 const GLShader::ShaderEntries& entries);
226
227 /// Opens current game's transferable file and write it's header if it doesn't exist 213 /// Opens current game's transferable file and write it's header if it doesn't exist
228 FileUtil::IOFile AppendTransferableFile() const; 214 FileUtil::IOFile AppendTransferableFile() const;
229 215
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b5a43e79e..0e22eede9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -16,17 +16,8 @@ using VideoCommon::Shader::CompilerSettings;
16using VideoCommon::Shader::ProgramCode; 16using VideoCommon::Shader::ProgramCode;
17using VideoCommon::Shader::ShaderIR; 17using VideoCommon::Shader::ShaderIR;
18 18
19static constexpr u32 PROGRAM_OFFSET = 10; 19std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
20static constexpr u32 COMPUTE_OFFSET = 0; 20 std::string out = GetCommonDeclarations();
21
22static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
23
24ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
25 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
26
27 std::string out = "// Shader Unique Id: VS" + id + "\n\n";
28 out += GetCommonDeclarations();
29
30 out += R"( 21 out += R"(
31layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { 22layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
32 vec4 viewport_flip; 23 vec4 viewport_flip;
@@ -34,17 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
34}; 25};
35 26
36)"; 27)";
37 28 const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
38 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); 29 out += Decompile(device, ir, stage, "vertex");
39 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; 30 if (ir_b) {
40 ProgramResult program = Decompile(device, program_ir, stage, "vertex"); 31 out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
41 out += program.first;
42
43 if (setup.IsDualProgram()) {
44 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
45 settings);
46 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
47 out += program_b.first;
48 } 32 }
49 33
50 out += R"( 34 out += R"(
@@ -52,7 +36,7 @@ void main() {
52 execute_vertex(); 36 execute_vertex();
53)"; 37)";
54 38
55 if (setup.IsDualProgram()) { 39 if (ir_b) {
56 out += " execute_vertex_b();"; 40 out += " execute_vertex_b();";
57 } 41 }
58 42
@@ -66,17 +50,13 @@ void main() {
66 // Viewport can be flipped, which is unsupported by glViewport 50 // Viewport can be flipped, which is unsupported by glViewport
67 gl_Position.xy *= viewport_flip.xy; 51 gl_Position.xy *= viewport_flip.xy;
68 } 52 }
69})"; 53}
70 54)";
71 return {std::move(out), std::move(program.second)}; 55 return out;
72} 56}
73 57
74ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { 58std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
75 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 59 std::string out = GetCommonDeclarations();
76
77 std::string out = "// Shader Unique Id: GS" + id + "\n\n";
78 out += GetCommonDeclarations();
79
80 out += R"( 60 out += R"(
81layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { 61layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
82 vec4 viewport_flip; 62 vec4 viewport_flip;
@@ -84,25 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
84}; 64};
85 65
86)"; 66)";
87 67 out += Decompile(device, ir, ProgramType::Geometry, "geometry");
88 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
89 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
90 out += program.first;
91 68
92 out += R"( 69 out += R"(
93void main() { 70void main() {
94 execute_geometry(); 71 execute_geometry();
95};)"; 72}
96 73)";
97 return {std::move(out), std::move(program.second)}; 74 return out;
98} 75}
99 76
100ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { 77std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
101 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 78 std::string out = GetCommonDeclarations();
102
103 std::string out = "// Shader Unique Id: FS" + id + "\n\n";
104 out += GetCommonDeclarations();
105
106 out += R"( 79 out += R"(
107layout (location = 0) out vec4 FragColor0; 80layout (location = 0) out vec4 FragColor0;
108layout (location = 1) out vec4 FragColor1; 81layout (location = 1) out vec4 FragColor1;
@@ -119,36 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
119}; 92};
120 93
121)"; 94)";
122 95 out += Decompile(device, ir, ProgramType::Fragment, "fragment");
123 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
124 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
125 out += program.first;
126 96
127 out += R"( 97 out += R"(
128void main() { 98void main() {
129 execute_fragment(); 99 execute_fragment();
130} 100}
131
132)"; 101)";
133 return {std::move(out), std::move(program.second)}; 102 return out;
134} 103}
135 104
136ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { 105std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
137 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 106 std::string out = GetCommonDeclarations();
138 107 out += Decompile(device, ir, ProgramType::Compute, "compute");
139 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
140 out += GetCommonDeclarations();
141
142 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings);
143 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
144 out += program.first;
145
146 out += R"( 108 out += R"(
147void main() { 109void main() {
148 execute_compute(); 110 execute_compute();
149} 111}
150)"; 112)";
151 return {std::move(out), std::move(program.second)}; 113 return out;
152} 114}
153 115
154} // namespace OpenGL::GLShader 116} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3833e88ab..cba2be9f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -17,44 +17,18 @@ class Device;
17namespace OpenGL::GLShader { 17namespace OpenGL::GLShader {
18 18
19using VideoCommon::Shader::ProgramCode; 19using VideoCommon::Shader::ProgramCode;
20 20using VideoCommon::Shader::ShaderIR;
21struct ShaderSetup {
22 explicit ShaderSetup(ProgramCode program_code) {
23 program.code = std::move(program_code);
24 }
25
26 struct {
27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
32 } program;
33
34 /// Used in scenarios where we have a dual vertex shaders
35 void SetProgramB(ProgramCode program_b) {
36 program.code_b = std::move(program_b);
37 has_program_b = true;
38 }
39
40 bool IsDualProgram() const {
41 return has_program_b;
42 }
43
44private:
45 bool has_program_b{};
46};
47 21
48/// Generates the GLSL vertex shader program source code for the given VS program 22/// Generates the GLSL vertex shader program source code for the given VS program
49ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); 23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
50 24
51/// Generates the GLSL geometry shader program source code for the given GS program 25/// Generates the GLSL geometry shader program source code for the given GS program
52ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); 26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
53 27
54/// Generates the GLSL fragment shader program source code for the given FS program 28/// Generates the GLSL fragment shader program source code for the given FS program
55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
56 30
57/// Generates the GLSL compute shader program source code for the given CS program 31/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); 32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
59 33
60} // namespace OpenGL::GLShader 34} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 0d943a826..42cf068b6 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1704,6 +1704,13 @@ public:
1704 return expr.value ? decomp.v_true : decomp.v_false; 1704 return expr.value ? decomp.v_true : decomp.v_false;
1705 } 1705 }
1706 1706
1707 Id operator()(const ExprGprEqual& expr) {
1708 const Id target = decomp.Constant(decomp.t_uint, expr.value);
1709 const Id gpr = decomp.BitcastTo<Type::Uint>(
1710 decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr))));
1711 return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target));
1712 }
1713
1707 Id Visit(const Expr& node) { 1714 Id Visit(const Expr& node) {
1708 return std::visit(*this, *node); 1715 return std::visit(*this, *node);
1709 } 1716 }
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index e43aecc18..3f96d9076 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -228,6 +228,10 @@ public:
228 inner += expr.value ? "true" : "false"; 228 inner += expr.value ? "true" : "false";
229 } 229 }
230 230
231 void operator()(const ExprGprEqual& expr) {
232 inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
233 }
234
231 const std::string& GetResult() const { 235 const std::string& GetResult() const {
232 return inner; 236 return inner;
233 } 237 }
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/const_buffer_locker.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Engines::SamplerDescriptor;
17
18ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
19 : stage{shader_stage} {}
20
21ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
22 Tegra::Engines::ConstBufferEngineInterface& engine)
23 : stage{shader_stage}, engine{&engine} {}
24
25ConstBufferLocker::~ConstBufferLocker() = default;
26
27std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
28 const std::pair<u32, u32> key = {buffer, offset};
29 const auto iter = keys.find(key);
30 if (iter != keys.end()) {
31 return iter->second;
32 }
33 if (!engine) {
34 return std::nullopt;
35 }
36 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
37 keys.emplace(key, value);
38 return value;
39}
40
41std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
42 const u32 key = offset;
43 const auto iter = bound_samplers.find(key);
44 if (iter != bound_samplers.end()) {
45 return iter->second;
46 }
47 if (!engine) {
48 return std::nullopt;
49 }
50 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
51 bound_samplers.emplace(key, value);
52 return value;
53}
54
55std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
56 u32 buffer, u32 offset) {
57 const std::pair key = {buffer, offset};
58 const auto iter = bindless_samplers.find(key);
59 if (iter != bindless_samplers.end()) {
60 return iter->second;
61 }
62 if (!engine) {
63 return std::nullopt;
64 }
65 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
66 bindless_samplers.emplace(key, value);
67 return value;
68}
69
70void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
71 keys.insert_or_assign({buffer, offset}, value);
72}
73
74void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
75 bound_samplers.insert_or_assign(offset, sampler);
76}
77
78void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
79 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
80}
81
82bool ConstBufferLocker::IsConsistent() const {
83 if (!engine) {
84 return false;
85 }
86 return std::all_of(keys.begin(), keys.end(),
87 [this](const auto& pair) {
88 const auto [cbuf, offset] = pair.first;
89 const auto value = pair.second;
90 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
91 }) &&
92 std::all_of(bound_samplers.begin(), bound_samplers.end(),
93 [this](const auto& sampler) {
94 const auto [key, value] = sampler;
95 return value == engine->AccessBoundSampler(stage, key);
96 }) &&
97 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
98 [this](const auto& sampler) {
99 const auto [cbuf, offset] = sampler.first;
100 const auto value = sampler.second;
101 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
102 });
103}
104
105bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
106 return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
107 bindless_samplers == rhs.bindless_samplers;
108}
109
110} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include "common/common_types.h"
9#include "common/hash.h"
10#include "video_core/engines/const_buffer_engine_interface.h"
11
12namespace VideoCommon::Shader {
13
14using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
15using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
16using BindlessSamplerMap =
17 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
18
19/**
20 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
21 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
22 * shader compilation.
23 **/
24class ConstBufferLocker {
25public:
26 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
27
28 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
29 Tegra::Engines::ConstBufferEngineInterface& engine);
30
31 ~ConstBufferLocker();
32
33 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
34 /// not it will obtain it from maxwell3d and register it.
35 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
36
37 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
38
39 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
40
41 /// Inserts a key.
42 void InsertKey(u32 buffer, u32 offset, u32 value);
43
44 /// Inserts a bound sampler key.
45 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
46
47 /// Inserts a bindless sampler key.
48 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
49
50 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
51 /// the same value, false otherwise;
52 bool IsConsistent() const;
53
54 /// Returns true if the keys are equal to the other ones in the locker.
55 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
56
57 /// Gives an getter to the const buffer keys in the database.
58 const KeyMap& GetKeys() const {
59 return keys;
60 }
61
62 /// Gets samplers database.
63 const BoundSamplerMap& GetBoundSamplers() const {
64 return bound_samplers;
65 }
66
67 /// Gets bindless samplers database.
68 const BindlessSamplerMap& GetBindlessSamplers() const {
69 return bindless_samplers;
70 }
71
72private:
73 const Tegra::Engines::ShaderType stage;
74 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
75 KeyMap keys;
76 BoundSamplerMap bound_samplers;
77 BindlessSamplerMap bindless_samplers;
78};
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 9d21f45de..d47c63d9f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -35,14 +35,20 @@ struct BlockStack {
35 std::stack<u32> pbk_stack{}; 35 std::stack<u32> pbk_stack{};
36}; 36};
37 37
38struct BlockBranchInfo { 38template <typename T, typename... Args>
39 Condition condition{}; 39BlockBranchInfo MakeBranchInfo(Args&&... args) {
40 s32 address{exit_branch}; 40 static_assert(std::is_convertible_v<T, BranchData>);
41 bool kill{}; 41 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
42 bool is_sync{}; 42}
43 bool is_brk{}; 43
44 bool ignore{}; 44bool BlockBranchIsIgnored(BlockBranchInfo first) {
45}; 45 bool ignore = false;
46 if (std::holds_alternative<SingleBranch>(*first)) {
47 const auto branch = std::get_if<SingleBranch>(first.get());
48 ignore = branch->ignore;
49 }
50 return ignore;
51}
46 52
47struct BlockInfo { 53struct BlockInfo {
48 u32 start{}; 54 u32 start{};
@@ -56,10 +62,11 @@ struct BlockInfo {
56}; 62};
57 63
58struct CFGRebuildState { 64struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, 65 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
60 const u32 start) 66 : program_code{program_code}, start{start}, locker{locker} {}
61 : start{start}, program_code{program_code}, program_size{program_size} {}
62 67
68 const ProgramCode& program_code;
69 ConstBufferLocker& locker;
63 u32 start{}; 70 u32 start{};
64 std::vector<BlockInfo> block_info{}; 71 std::vector<BlockInfo> block_info{};
65 std::list<u32> inspect_queries{}; 72 std::list<u32> inspect_queries{};
@@ -69,8 +76,6 @@ struct CFGRebuildState {
69 std::map<u32, u32> ssy_labels{}; 76 std::map<u32, u32> ssy_labels{};
70 std::map<u32, u32> pbk_labels{}; 77 std::map<u32, u32> pbk_labels{};
71 std::unordered_map<u32, BlockStack> stacks{}; 78 std::unordered_map<u32, BlockStack> stacks{};
72 const ProgramCode& program_code;
73 const std::size_t program_size;
74 ASTManager* manager; 79 ASTManager* manager;
75}; 80};
76 81
@@ -124,10 +129,116 @@ enum class ParseResult : u32 {
124 AbnormalFlow, 129 AbnormalFlow,
125}; 130};
126 131
132struct BranchIndirectInfo {
133 u32 buffer{};
134 u32 offset{};
135 u32 entries{};
136 s32 relative_position{};
137};
138
139std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
140 u32 start_address, u32 current_position) {
141 const u32 shader_start = state.start;
142 u32 pos = current_position;
143 BranchIndirectInfo result{};
144 u64 track_register = 0;
145
146 // Step 0 Get BRX Info
147 const Instruction instr = {state.program_code[pos]};
148 const auto opcode = OpCode::Decode(instr);
149 if (opcode->get().GetId() != OpCode::Id::BRX) {
150 return std::nullopt;
151 }
152 if (instr.brx.constant_buffer != 0) {
153 return std::nullopt;
154 }
155 track_register = instr.gpr8.Value();
156 result.relative_position = instr.brx.GetBranchExtend();
157 pos--;
158 bool found_track = false;
159
160 // Step 1 Track LDC
161 while (pos >= shader_start) {
162 if (IsSchedInstruction(pos, shader_start)) {
163 pos--;
164 continue;
165 }
166 const Instruction instr = {state.program_code[pos]};
167 const auto opcode = OpCode::Decode(instr);
168 if (opcode->get().GetId() == OpCode::Id::LD_C) {
169 if (instr.gpr0.Value() == track_register &&
170 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
171 result.buffer = instr.cbuf36.index.Value();
172 result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
173 track_register = instr.gpr8.Value();
174 pos--;
175 found_track = true;
176 break;
177 }
178 }
179 pos--;
180 }
181
182 if (!found_track) {
183 return std::nullopt;
184 }
185 found_track = false;
186
187 // Step 2 Track SHL
188 while (pos >= shader_start) {
189 if (IsSchedInstruction(pos, shader_start)) {
190 pos--;
191 continue;
192 }
193 const Instruction instr = state.program_code[pos];
194 const auto opcode = OpCode::Decode(instr);
195 if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
196 if (instr.gpr0.Value() == track_register) {
197 track_register = instr.gpr8.Value();
198 pos--;
199 found_track = true;
200 break;
201 }
202 }
203 pos--;
204 }
205
206 if (!found_track) {
207 return std::nullopt;
208 }
209 found_track = false;
210
211 // Step 3 Track IMNMX
212 while (pos >= shader_start) {
213 if (IsSchedInstruction(pos, shader_start)) {
214 pos--;
215 continue;
216 }
217 const Instruction instr = state.program_code[pos];
218 const auto opcode = OpCode::Decode(instr);
219 if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
220 if (instr.gpr0.Value() == track_register) {
221 track_register = instr.gpr8.Value();
222 result.entries = instr.alu.GetSignedImm20_20() + 1;
223 pos--;
224 found_track = true;
225 break;
226 }
227 }
228 pos--;
229 }
230
231 if (!found_track) {
232 return std::nullopt;
233 }
234 return result;
235}
236
127std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { 237std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
128 u32 offset = static_cast<u32>(address); 238 u32 offset = static_cast<u32>(address);
129 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); 239 const u32 end_address = static_cast<u32>(state.program_code.size());
130 ParseInfo parse_info{}; 240 ParseInfo parse_info{};
241 SingleBranch single_branch{};
131 242
132 const auto insert_label = [](CFGRebuildState& state, u32 address) { 243 const auto insert_label = [](CFGRebuildState& state, u32 address) {
133 const auto pair = state.labels.emplace(address); 244 const auto pair = state.labels.emplace(address);
@@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
140 if (offset >= end_address) { 251 if (offset >= end_address) {
141 // ASSERT_OR_EXECUTE can't be used, as it ignores the break 252 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
142 ASSERT_MSG(false, "Shader passed the current limit!"); 253 ASSERT_MSG(false, "Shader passed the current limit!");
143 parse_info.branch_info.address = exit_branch; 254
144 parse_info.branch_info.ignore = false; 255 single_branch.address = exit_branch;
256 single_branch.ignore = false;
145 break; 257 break;
146 } 258 }
147 if (state.registered.count(offset) != 0) { 259 if (state.registered.count(offset) != 0) {
148 parse_info.branch_info.address = offset; 260 single_branch.address = offset;
149 parse_info.branch_info.ignore = true; 261 single_branch.ignore = true;
150 break; 262 break;
151 } 263 }
152 if (IsSchedInstruction(offset, state.start)) { 264 if (IsSchedInstruction(offset, state.start)) {
@@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
163 switch (opcode->get().GetId()) { 275 switch (opcode->get().GetId()) {
164 case OpCode::Id::EXIT: { 276 case OpCode::Id::EXIT: {
165 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 277 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
166 parse_info.branch_info.condition.predicate = 278 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
167 GetPredicate(pred_index, instr.negate_pred != 0); 279 if (single_branch.condition.predicate == Pred::NeverExecute) {
168 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
169 offset++; 280 offset++;
170 continue; 281 continue;
171 } 282 }
172 const ConditionCode cc = instr.flow_condition_code; 283 const ConditionCode cc = instr.flow_condition_code;
173 parse_info.branch_info.condition.cc = cc; 284 single_branch.condition.cc = cc;
174 if (cc == ConditionCode::F) { 285 if (cc == ConditionCode::F) {
175 offset++; 286 offset++;
176 continue; 287 continue;
177 } 288 }
178 parse_info.branch_info.address = exit_branch; 289 single_branch.address = exit_branch;
179 parse_info.branch_info.kill = false; 290 single_branch.kill = false;
180 parse_info.branch_info.is_sync = false; 291 single_branch.is_sync = false;
181 parse_info.branch_info.is_brk = false; 292 single_branch.is_brk = false;
182 parse_info.branch_info.ignore = false; 293 single_branch.ignore = false;
183 parse_info.end_address = offset; 294 parse_info.end_address = offset;
295 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
296 single_branch.condition, single_branch.address, single_branch.kill,
297 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
184 298
185 return {ParseResult::ControlCaught, parse_info}; 299 return {ParseResult::ControlCaught, parse_info};
186 } 300 }
@@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
189 return {ParseResult::AbnormalFlow, parse_info}; 303 return {ParseResult::AbnormalFlow, parse_info};
190 } 304 }
191 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 305 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
192 parse_info.branch_info.condition.predicate = 306 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
193 GetPredicate(pred_index, instr.negate_pred != 0); 307 if (single_branch.condition.predicate == Pred::NeverExecute) {
194 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
195 offset++; 308 offset++;
196 continue; 309 continue;
197 } 310 }
198 const ConditionCode cc = instr.flow_condition_code; 311 const ConditionCode cc = instr.flow_condition_code;
199 parse_info.branch_info.condition.cc = cc; 312 single_branch.condition.cc = cc;
200 if (cc == ConditionCode::F) { 313 if (cc == ConditionCode::F) {
201 offset++; 314 offset++;
202 continue; 315 continue;
203 } 316 }
204 const u32 branch_offset = offset + instr.bra.GetBranchTarget(); 317 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
205 if (branch_offset == 0) { 318 if (branch_offset == 0) {
206 parse_info.branch_info.address = exit_branch; 319 single_branch.address = exit_branch;
207 } else { 320 } else {
208 parse_info.branch_info.address = branch_offset; 321 single_branch.address = branch_offset;
209 } 322 }
210 insert_label(state, branch_offset); 323 insert_label(state, branch_offset);
211 parse_info.branch_info.kill = false; 324 single_branch.kill = false;
212 parse_info.branch_info.is_sync = false; 325 single_branch.is_sync = false;
213 parse_info.branch_info.is_brk = false; 326 single_branch.is_brk = false;
214 parse_info.branch_info.ignore = false; 327 single_branch.ignore = false;
215 parse_info.end_address = offset; 328 parse_info.end_address = offset;
329 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
330 single_branch.condition, single_branch.address, single_branch.kill,
331 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
216 332
217 return {ParseResult::ControlCaught, parse_info}; 333 return {ParseResult::ControlCaught, parse_info};
218 } 334 }
219 case OpCode::Id::SYNC: { 335 case OpCode::Id::SYNC: {
220 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 336 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
221 parse_info.branch_info.condition.predicate = 337 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
222 GetPredicate(pred_index, instr.negate_pred != 0); 338 if (single_branch.condition.predicate == Pred::NeverExecute) {
223 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
224 offset++; 339 offset++;
225 continue; 340 continue;
226 } 341 }
227 const ConditionCode cc = instr.flow_condition_code; 342 const ConditionCode cc = instr.flow_condition_code;
228 parse_info.branch_info.condition.cc = cc; 343 single_branch.condition.cc = cc;
229 if (cc == ConditionCode::F) { 344 if (cc == ConditionCode::F) {
230 offset++; 345 offset++;
231 continue; 346 continue;
232 } 347 }
233 parse_info.branch_info.address = unassigned_branch; 348 single_branch.address = unassigned_branch;
234 parse_info.branch_info.kill = false; 349 single_branch.kill = false;
235 parse_info.branch_info.is_sync = true; 350 single_branch.is_sync = true;
236 parse_info.branch_info.is_brk = false; 351 single_branch.is_brk = false;
237 parse_info.branch_info.ignore = false; 352 single_branch.ignore = false;
238 parse_info.end_address = offset; 353 parse_info.end_address = offset;
354 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
355 single_branch.condition, single_branch.address, single_branch.kill,
356 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
239 357
240 return {ParseResult::ControlCaught, parse_info}; 358 return {ParseResult::ControlCaught, parse_info};
241 } 359 }
242 case OpCode::Id::BRK: { 360 case OpCode::Id::BRK: {
243 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 361 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
244 parse_info.branch_info.condition.predicate = 362 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
245 GetPredicate(pred_index, instr.negate_pred != 0); 363 if (single_branch.condition.predicate == Pred::NeverExecute) {
246 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
247 offset++; 364 offset++;
248 continue; 365 continue;
249 } 366 }
250 const ConditionCode cc = instr.flow_condition_code; 367 const ConditionCode cc = instr.flow_condition_code;
251 parse_info.branch_info.condition.cc = cc; 368 single_branch.condition.cc = cc;
252 if (cc == ConditionCode::F) { 369 if (cc == ConditionCode::F) {
253 offset++; 370 offset++;
254 continue; 371 continue;
255 } 372 }
256 parse_info.branch_info.address = unassigned_branch; 373 single_branch.address = unassigned_branch;
257 parse_info.branch_info.kill = false; 374 single_branch.kill = false;
258 parse_info.branch_info.is_sync = false; 375 single_branch.is_sync = false;
259 parse_info.branch_info.is_brk = true; 376 single_branch.is_brk = true;
260 parse_info.branch_info.ignore = false; 377 single_branch.ignore = false;
261 parse_info.end_address = offset; 378 parse_info.end_address = offset;
379 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
380 single_branch.condition, single_branch.address, single_branch.kill,
381 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
262 382
263 return {ParseResult::ControlCaught, parse_info}; 383 return {ParseResult::ControlCaught, parse_info};
264 } 384 }
265 case OpCode::Id::KIL: { 385 case OpCode::Id::KIL: {
266 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 386 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
267 parse_info.branch_info.condition.predicate = 387 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
268 GetPredicate(pred_index, instr.negate_pred != 0); 388 if (single_branch.condition.predicate == Pred::NeverExecute) {
269 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
270 offset++; 389 offset++;
271 continue; 390 continue;
272 } 391 }
273 const ConditionCode cc = instr.flow_condition_code; 392 const ConditionCode cc = instr.flow_condition_code;
274 parse_info.branch_info.condition.cc = cc; 393 single_branch.condition.cc = cc;
275 if (cc == ConditionCode::F) { 394 if (cc == ConditionCode::F) {
276 offset++; 395 offset++;
277 continue; 396 continue;
278 } 397 }
279 parse_info.branch_info.address = exit_branch; 398 single_branch.address = exit_branch;
280 parse_info.branch_info.kill = true; 399 single_branch.kill = true;
281 parse_info.branch_info.is_sync = false; 400 single_branch.is_sync = false;
282 parse_info.branch_info.is_brk = false; 401 single_branch.is_brk = false;
283 parse_info.branch_info.ignore = false; 402 single_branch.ignore = false;
284 parse_info.end_address = offset; 403 parse_info.end_address = offset;
404 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
405 single_branch.condition, single_branch.address, single_branch.kill,
406 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
285 407
286 return {ParseResult::ControlCaught, parse_info}; 408 return {ParseResult::ControlCaught, parse_info};
287 } 409 }
@@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
298 break; 420 break;
299 } 421 }
300 case OpCode::Id::BRX: { 422 case OpCode::Id::BRX: {
423 auto tmp = TrackBranchIndirectInfo(state, address, offset);
424 if (tmp) {
425 auto result = *tmp;
426 std::vector<CaseBranch> branches{};
427 s32 pc_target = offset + result.relative_position;
428 for (u32 i = 0; i < result.entries; i++) {
429 auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
430 if (!k) {
431 return {ParseResult::AbnormalFlow, parse_info};
432 }
433 u32 value = *k;
434 u32 target = static_cast<u32>((value >> 3) + pc_target);
435 insert_label(state, target);
436 branches.emplace_back(value, target);
437 }
438 parse_info.end_address = offset;
439 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
440 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
441
442 return {ParseResult::ControlCaught, parse_info};
443 } else {
444 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
445 }
301 return {ParseResult::AbnormalFlow, parse_info}; 446 return {ParseResult::AbnormalFlow, parse_info};
302 } 447 }
303 default: 448 default:
@@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
306 451
307 offset++; 452 offset++;
308 } 453 }
309 parse_info.branch_info.kill = false; 454 single_branch.kill = false;
310 parse_info.branch_info.is_sync = false; 455 single_branch.is_sync = false;
311 parse_info.branch_info.is_brk = false; 456 single_branch.is_brk = false;
312 parse_info.end_address = offset - 1; 457 parse_info.end_address = offset - 1;
458 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
459 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
460 single_branch.is_brk, single_branch.ignore);
313 return {ParseResult::BlockEnd, parse_info}; 461 return {ParseResult::BlockEnd, parse_info};
314} 462}
315 463
@@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
333 BlockInfo& current_block = state.block_info[block_index]; 481 BlockInfo& current_block = state.block_info[block_index];
334 current_block.end = address - 1; 482 current_block.end = address - 1;
335 new_block.branch = current_block.branch; 483 new_block.branch = current_block.branch;
336 BlockBranchInfo forward_branch{}; 484 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
337 forward_branch.address = address; 485 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
338 forward_branch.ignore = true; 486 branch->address = address;
487 branch->ignore = true;
339 current_block.branch = forward_branch; 488 current_block.branch = forward_branch;
340 return true; 489 return true;
341 } 490 }
@@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
350 499
351 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); 500 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
352 block_info.branch = parse_info.branch_info; 501 block_info.branch = parse_info.branch_info;
353 if (parse_info.branch_info.condition.IsUnconditional()) { 502 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
503 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
504 if (branch->condition.IsUnconditional()) {
505 return true;
506 }
507 const u32 fallthrough_address = parse_info.end_address + 1;
508 state.inspect_queries.push_front(fallthrough_address);
354 return true; 509 return true;
355 } 510 }
356
357 const u32 fallthrough_address = parse_info.end_address + 1;
358 state.inspect_queries.push_front(fallthrough_address);
359 return true; 511 return true;
360} 512}
361 513
@@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) {
393 state.queries.pop_front(); 545 state.queries.pop_front();
394 gather_labels(q2.ssy_stack, state.ssy_labels, block); 546 gather_labels(q2.ssy_stack, state.ssy_labels, block);
395 gather_labels(q2.pbk_stack, state.pbk_labels, block); 547 gather_labels(q2.pbk_stack, state.pbk_labels, block);
396 if (!block.branch.condition.IsUnconditional()) { 548 if (std::holds_alternative<SingleBranch>(*block.branch)) {
397 q2.address = block.end + 1; 549 const auto branch = std::get_if<SingleBranch>(block.branch.get());
398 state.queries.push_back(q2); 550 if (!branch->condition.IsUnconditional()) {
399 } 551 q2.address = block.end + 1;
552 state.queries.push_back(q2);
553 }
400 554
401 Query conditional_query{q2}; 555 Query conditional_query{q2};
402 if (block.branch.is_sync) { 556 if (branch->is_sync) {
403 if (block.branch.address == unassigned_branch) { 557 if (branch->address == unassigned_branch) {
404 block.branch.address = conditional_query.ssy_stack.top(); 558 branch->address = conditional_query.ssy_stack.top();
559 }
560 conditional_query.ssy_stack.pop();
405 } 561 }
406 conditional_query.ssy_stack.pop(); 562 if (branch->is_brk) {
407 } 563 if (branch->address == unassigned_branch) {
408 if (block.branch.is_brk) { 564 branch->address = conditional_query.pbk_stack.top();
409 if (block.branch.address == unassigned_branch) { 565 }
410 block.branch.address = conditional_query.pbk_stack.top(); 566 conditional_query.pbk_stack.pop();
411 } 567 }
412 conditional_query.pbk_stack.pop(); 568 conditional_query.address = branch->address;
569 state.queries.push_back(std::move(conditional_query));
570 return true;
571 }
572 const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
573 for (const auto& branch_case : multi_branch->branches) {
574 Query conditional_query{q2};
575 conditional_query.address = branch_case.address;
576 state.queries.push_back(std::move(conditional_query));
413 } 577 }
414 conditional_query.address = block.branch.address;
415 state.queries.push_back(std::move(conditional_query));
416 return true; 578 return true;
417} 579}
580
418} // Anonymous namespace 581} // Anonymous namespace
419 582
420void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { 583void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
421 const auto get_expr = ([&](const Condition& cond) -> Expr { 584 const auto get_expr = ([&](const Condition& cond) -> Expr {
422 Expr result{}; 585 Expr result{};
423 if (cond.cc != ConditionCode::T) { 586 if (cond.cc != ConditionCode::T) {
@@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
444 } 607 }
445 return MakeExpr<ExprBoolean>(true); 608 return MakeExpr<ExprBoolean>(true);
446 }); 609 });
447 if (branch.address < 0) { 610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
448 if (branch.kill) { 611 const auto branch = std::get_if<SingleBranch>(branch_info.get());
449 mm.InsertReturn(get_expr(branch.condition), true); 612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
450 return; 618 return;
451 } 619 }
452 mm.InsertReturn(get_expr(branch.condition), false); 620 mm.InsertGoto(get_expr(branch->condition), branch->address);
453 return; 621 return;
454 } 622 }
455 mm.InsertGoto(get_expr(branch.condition), branch.address); 623 const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
456} 628}
457 629
458void DecompileShader(CFGRebuildState& state) { 630void DecompileShader(CFGRebuildState& state) {
@@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) {
464 if (state.labels.count(block.start) != 0) { 636 if (state.labels.count(block.start) != 0) {
465 state.manager->InsertLabel(block.start); 637 state.manager->InsertLabel(block.start);
466 } 638 }
467 u32 end = block.branch.ignore ? block.end + 1 : block.end; 639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 u32 end = ignore ? block.end + 1 : block.end;
468 state.manager->InsertBlock(block.start, end); 641 state.manager->InsertBlock(block.start, end);
469 if (!block.branch.ignore) { 642 if (!ignore) {
470 InsertBranch(*state.manager, block.branch); 643 InsertBranch(*state.manager, block.branch);
471 } 644 }
472 } 645 }
473 state.manager->Decompile(); 646 state.manager->Decompile();
474} 647}
475 648
476std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 649std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
477 std::size_t program_size, u32 start_address, 650 const CompilerSettings& settings,
478 const CompilerSettings& settings) { 651 ConstBufferLocker& locker) {
479 auto result_out = std::make_unique<ShaderCharacteristics>(); 652 auto result_out = std::make_unique<ShaderCharacteristics>();
480 if (settings.depth == CompileDepth::BruteForce) { 653 if (settings.depth == CompileDepth::BruteForce) {
481 result_out->settings.depth = CompileDepth::BruteForce; 654 result_out->settings.depth = CompileDepth::BruteForce;
482 return result_out; 655 return result_out;
483 } 656 }
484 657
485 CFGRebuildState state{program_code, program_size, start_address}; 658 CFGRebuildState state{program_code, start_address, locker};
486 // Inspect Code and generate blocks 659 // Inspect Code and generate blocks
487 state.labels.clear(); 660 state.labels.clear();
488 state.labels.emplace(start_address); 661 state.labels.emplace(start_address);
@@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
547 ShaderBlock new_block{}; 720 ShaderBlock new_block{};
548 new_block.start = block.start; 721 new_block.start = block.start;
549 new_block.end = block.end; 722 new_block.end = block.end;
550 new_block.ignore_branch = block.branch.ignore; 723 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
551 if (!new_block.ignore_branch) { 724 if (!new_block.ignore_branch) {
552 new_block.branch.cond = block.branch.condition; 725 new_block.branch = block.branch;
553 new_block.branch.kills = block.branch.kill;
554 new_block.branch.address = block.branch.address;
555 } 726 }
556 result_out->end = std::max(result_out->end, block.end); 727 result_out->end = std::max(result_out->end, block.end);
557 result_out->blocks.push_back(new_block); 728 result_out->blocks.push_back(new_block);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 37e987d62..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -7,6 +7,7 @@
7#include <list> 7#include <list>
8#include <optional> 8#include <optional>
9#include <set> 9#include <set>
10#include <variant>
10 11
11#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
@@ -37,29 +38,61 @@ struct Condition {
37 } 38 }
38}; 39};
39 40
40struct ShaderBlock { 41class SingleBranch {
41 struct Branch { 42public:
42 Condition cond{}; 43 SingleBranch() = default;
43 bool kills{}; 44 SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
44 s32 address{}; 45 bool ignore)
46 : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
47 ignore{ignore} {}
48
49 bool operator==(const SingleBranch& b) const {
50 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
51 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
52 }
53
54 bool operator!=(const SingleBranch& b) const {
55 return !operator==(b);
56 }
57
58 Condition condition{};
59 s32 address{exit_branch};
60 bool kill{};
61 bool is_sync{};
62 bool is_brk{};
63 bool ignore{};
64};
45 65
46 bool operator==(const Branch& b) const { 66struct CaseBranch {
47 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); 67 CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
48 } 68 u32 cmp_value;
69 u32 address;
70};
71
72class MultiBranch {
73public:
74 MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
75 : gpr{gpr}, branches{std::move(branches)} {}
76
77 u32 gpr{};
78 std::vector<CaseBranch> branches{};
79};
80
81using BranchData = std::variant<SingleBranch, MultiBranch>;
82using BlockBranchInfo = std::shared_ptr<BranchData>;
49 83
50 bool operator!=(const Branch& b) const { 84bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
51 return !operator==(b);
52 }
53 };
54 85
86struct ShaderBlock {
55 u32 start{}; 87 u32 start{};
56 u32 end{}; 88 u32 end{};
57 bool ignore_branch{}; 89 bool ignore_branch{};
58 Branch branch{}; 90 BlockBranchInfo branch{};
59 91
60 bool operator==(const ShaderBlock& sb) const { 92 bool operator==(const ShaderBlock& sb) const {
61 return std::tie(start, end, ignore_branch, branch) == 93 return std::tie(start, end, ignore_branch) ==
62 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); 94 std::tie(sb.start, sb.end, sb.ignore_branch) &&
95 BlockBranchInfoAreEqual(branch, sb.branch);
63 } 96 }
64 97
65 bool operator!=(const ShaderBlock& sb) const { 98 bool operator!=(const ShaderBlock& sb) const {
@@ -76,8 +109,8 @@ struct ShaderCharacteristics {
76 CompilerSettings settings{}; 109 CompilerSettings settings{};
77}; 110};
78 111
79std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
80 std::size_t program_size, u32 start_address, 113 const CompilerSettings& settings,
81 const CompilerSettings& settings); 114 ConstBufferLocker& locker);
82 115
83} // namespace VideoCommon::Shader 116} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..21fb9cb83 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
33 return (absolute_offset % SchedPeriod) == 0; 33 return (absolute_offset % SchedPeriod) == 0;
34} 34}
35 35
36} // namespace 36} // Anonymous namespace
37 37
38class ASTDecoder { 38class ASTDecoder {
39public: 39public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
103 103
104 decompiled = false; 104 decompiled = false;
105 auto info = ScanFlow(program_code, program_size, main_offset, settings); 105 auto info = ScanFlow(program_code, main_offset, settings, locker);
106 auto& shader_info = *info; 106 auto& shader_info = *info;
107 coverage_begin = shader_info.start; 107 coverage_begin = shader_info.start;
108 coverage_end = shader_info.end; 108 coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
155 [[fallthrough]]; 155 [[fallthrough]];
156 case CompileDepth::BruteForce: { 156 case CompileDepth::BruteForce: {
157 coverage_begin = main_offset; 157 coverage_begin = main_offset;
158 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); 158 const std::size_t shader_end = program_code.size();
159 coverage_end = shader_end; 159 coverage_end = shader_end;
160 for (u32 label = main_offset; label < shader_end; label++) { 160 for (u32 label = main_offset; label < shader_end; label++) {
161 basic_blocks.insert({label, DecodeRange(label, label + 1)}); 161 basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
198 } 198 }
199 return result; 199 return result;
200 }; 200 };
201 if (block.branch.address < 0) { 201 if (std::holds_alternative<SingleBranch>(*block.branch)) {
202 if (block.branch.kills) { 202 auto branch = std::get_if<SingleBranch>(block.branch.get());
203 Node n = Operation(OperationCode::Discard); 203 if (branch->address < 0) {
204 n = apply_conditions(block.branch.cond, n); 204 if (branch->kill) {
205 Node n = Operation(OperationCode::Discard);
206 n = apply_conditions(branch->condition, n);
207 bb.push_back(n);
208 global_code.push_back(n);
209 return;
210 }
211 Node n = Operation(OperationCode::Exit);
212 n = apply_conditions(branch->condition, n);
205 bb.push_back(n); 213 bb.push_back(n);
206 global_code.push_back(n); 214 global_code.push_back(n);
207 return; 215 return;
208 } 216 }
209 Node n = Operation(OperationCode::Exit); 217 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
210 n = apply_conditions(block.branch.cond, n); 218 n = apply_conditions(branch->condition, n);
211 bb.push_back(n); 219 bb.push_back(n);
212 global_code.push_back(n); 220 global_code.push_back(n);
213 return; 221 return;
214 } 222 }
215 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); 223 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
216 n = apply_conditions(block.branch.cond, n); 224 Node op_a = GetRegister(multi_branch->gpr);
217 bb.push_back(n); 225 for (auto& branch_case : multi_branch->branches) {
218 global_code.push_back(n); 226 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
227 Node op_b = Immediate(branch_case.cmp_value);
228 Node condition =
229 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
230 auto result = Conditional(condition, {n});
231 bb.push_back(result);
232 global_code.push_back(result);
233 }
219} 234}
220 235
221u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 236u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 295445498..d61e656b7 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); 141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
142 142
143 const auto& sampler = 143 const auto& sampler =
144 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); 144 GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
165 // Sadly, not all texture instructions specify the type of texture their sampler 165 // Sadly, not all texture instructions specify the type of texture their sampler
166 // uses. This must be fixed at a later instance. 166 // uses. This must be fixed at a later instance.
167 const auto& sampler = 167 const auto& sampler =
168 is_bindless 168 is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
169 ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
170 false)
171 : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
172 169
173 u32 indexer = 0; 170 u32 indexer = 0;
174 switch (instr.txq.query_type) { 171 switch (instr.txq.query_type) {
@@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
207 204
208 auto texture_type = instr.tmml.texture_type.Value(); 205 auto texture_type = instr.tmml.texture_type.Value();
209 const bool is_array = instr.tmml.array != 0; 206 const bool is_array = instr.tmml.array != 0;
210 const auto& sampler = is_bindless 207 const auto& sampler =
211 ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) 208 is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
212 : GetSampler(instr.sampler, texture_type, is_array, false); 209 : GetSampler(instr.sampler, {{texture_type, is_array, false}});
213 210
214 std::vector<Node> coords; 211 std::vector<Node> coords;
215 212
@@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
285 return pc; 282 return pc;
286} 283}
287 284
288const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, 285const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
289 bool is_array, bool is_shadow) { 286 std::optional<SamplerInfo> sampler_info) {
290 const auto offset = static_cast<std::size_t>(sampler.index.Value()); 287 const auto offset = static_cast<u32>(sampler.index.Value());
288
289 Tegra::Shader::TextureType type;
290 bool is_array;
291 bool is_shadow;
292 if (sampler_info) {
293 type = sampler_info->type;
294 is_array = sampler_info->is_array;
295 is_shadow = sampler_info->is_shadow;
296 } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
297 type = sampler->texture_type.Value();
298 is_array = sampler->is_array.Value() != 0;
299 is_shadow = sampler->is_shadow.Value() != 0;
300 } else {
301 type = Tegra::Shader::TextureType::Texture2D;
302 is_array = false;
303 is_shadow = false;
304 }
291 305
292 // If this sampler has already been used, return the existing mapping. 306 // If this sampler has already been used, return the existing mapping.
293 const auto itr = 307 const auto itr =
@@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
303 const std::size_t next_index = used_samplers.size(); 317 const std::size_t next_index = used_samplers.size();
304 const Sampler entry{offset, next_index, type, is_array, is_shadow}; 318 const Sampler entry{offset, next_index, type, is_array, is_shadow};
305 return *used_samplers.emplace(entry).first; 319 return *used_samplers.emplace(entry).first;
306} 320} // namespace VideoCommon::Shader
307 321
308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 322const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
309 bool is_array, bool is_shadow) { 323 std::optional<SamplerInfo> sampler_info) {
310 const Node sampler_register = GetRegister(reg); 324 const Node sampler_register = GetRegister(reg);
311 const auto [base_sampler, cbuf_index, cbuf_offset] = 325 const auto [base_sampler, cbuf_index, cbuf_offset] =
312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 326 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
313 ASSERT(base_sampler != nullptr); 327 ASSERT(base_sampler != nullptr);
314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 328 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
329 Tegra::Shader::TextureType type;
330 bool is_array;
331 bool is_shadow;
332 if (sampler_info) {
333 type = sampler_info->type;
334 is_array = sampler_info->is_array;
335 is_shadow = sampler_info->is_shadow;
336 } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
337 type = sampler->texture_type.Value();
338 is_array = sampler->is_array.Value() != 0;
339 is_shadow = sampler->is_shadow.Value() != 0;
340 } else {
341 type = Tegra::Shader::TextureType::Texture2D;
342 is_array = false;
343 is_shadow = false;
344 }
315 345
316 // If this sampler has already been used, return the existing mapping. 346 // If this sampler has already been used, return the existing mapping.
317 const auto itr = 347 const auto itr =
@@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
411 (texture_type == TextureType::TextureCube && is_array && is_shadow), 441 (texture_type == TextureType::TextureCube && is_array && is_shadow),
412 "This method is not supported."); 442 "This method is not supported.");
413 443
414 const auto& sampler = is_bindless 444 const auto& sampler =
415 ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) 445 is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
416 : GetSampler(instr.sampler, texture_type, is_array, is_shadow); 446 : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
417 447
418 const bool lod_needed = process_mode == TextureProcessMode::LZ || 448 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
419 process_mode == TextureProcessMode::LL || 449 process_mode == TextureProcessMode::LL ||
@@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
577 dc = GetRegister(parameter_register++); 607 dc = GetRegister(parameter_register++);
578 } 608 }
579 609
580 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 610 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
581 611
582 Node4 values; 612 Node4 values;
583 for (u32 element = 0; element < values.size(); ++element) { 613 for (u32 element = 0; element < values.size(); ++element) {
@@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
610 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 640 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
611 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 641 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
612 642
613 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 643 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
614 644
615 Node4 values; 645 Node4 values;
616 for (u32 element = 0; element < values.size(); ++element) { 646 for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
646 // When lod is used always is in gpr20 676 // When lod is used always is in gpr20
647 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 677 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
648 678
649 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 679 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
650 680
651 Node4 values; 681 Node4 values;
652 for (u32 element = 0; element < values.size(); ++element) { 682 for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index d3dcd00ec..4e8264367 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -17,13 +17,14 @@ using Tegra::Shader::Pred;
17class ExprAnd; 17class ExprAnd;
18class ExprBoolean; 18class ExprBoolean;
19class ExprCondCode; 19class ExprCondCode;
20class ExprGprEqual;
20class ExprNot; 21class ExprNot;
21class ExprOr; 22class ExprOr;
22class ExprPredicate; 23class ExprPredicate;
23class ExprVar; 24class ExprVar;
24 25
25using ExprData = 26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
26 std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; 27 ExprBoolean, ExprGprEqual>;
27using Expr = std::shared_ptr<ExprData>; 28using Expr = std::shared_ptr<ExprData>;
28 29
29class ExprAnd final { 30class ExprAnd final {
@@ -118,6 +119,22 @@ public:
118 bool value; 119 bool value;
119}; 120};
120 121
122class ExprGprEqual final {
123public:
124 ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
121template <typename T, typename... Args> 138template <typename T, typename... Args>
122Expr MakeExpr(Args&&... args) { 139Expr MakeExpr(Args&&... args) {
123 static_assert(std::is_convertible_v<T, ExprData>); 140 static_assert(std::is_convertible_v<T, ExprData>);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index b10d376cb..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -23,10 +23,9 @@ using Tegra::Shader::PredCondition;
23using Tegra::Shader::PredOperation; 23using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 24using Tegra::Shader::Register;
25 25
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, 26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 CompilerSettings settings) 27 ConstBufferLocker& locker)
28 : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, 28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
29 program_manager{true, true}, settings{settings} {
30 Decode(); 29 Decode();
31} 30}
32 31
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 02ddf2a75..1fd44bde1 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -17,6 +17,7 @@
17#include "video_core/engines/shader_header.h" 17#include "video_core/engines/shader_header.h"
18#include "video_core/shader/ast.h" 18#include "video_core/shader/ast.h"
19#include "video_core/shader/compiler_settings.h" 19#include "video_core/shader/compiler_settings.h"
20#include "video_core/shader/const_buffer_locker.h"
20#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
21 22
22namespace VideoCommon::Shader { 23namespace VideoCommon::Shader {
@@ -66,8 +67,8 @@ struct GlobalMemoryUsage {
66 67
67class ShaderIR final { 68class ShaderIR final {
68public: 69public:
69 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, 70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
70 CompilerSettings settings); 71 ConstBufferLocker& locker);
71 ~ShaderIR(); 72 ~ShaderIR();
72 73
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 74 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -172,6 +173,13 @@ public:
172 173
173private: 174private:
174 friend class ASTDecoder; 175 friend class ASTDecoder;
176
177 struct SamplerInfo {
178 Tegra::Shader::TextureType type;
179 bool is_array;
180 bool is_shadow;
181 };
182
175 void Decode(); 183 void Decode();
176 184
177 NodeBlock DecodeRange(u32 begin, u32 end); 185 NodeBlock DecodeRange(u32 begin, u32 end);
@@ -296,12 +304,11 @@ private:
296 304
297 /// Accesses a texture sampler 305 /// Accesses a texture sampler
298 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, 306 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
299 Tegra::Shader::TextureType type, bool is_array, bool is_shadow); 307 std::optional<SamplerInfo> sampler_info);
300 308
301 // Accesses a texture sampler for a bindless texture. 309 // Accesses a texture sampler for a bindless texture.
302 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, 310 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
303 Tegra::Shader::TextureType type, bool is_array, 311 std::optional<SamplerInfo> sampler_info);
304 bool is_shadow);
305 312
306 /// Accesses an image. 313 /// Accesses an image.
307 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 314 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -377,7 +384,9 @@ private:
377 384
378 const ProgramCode& program_code; 385 const ProgramCode& program_code;
379 const u32 main_offset; 386 const u32 main_offset;
380 const std::size_t program_size; 387 const CompilerSettings settings;
388 ConstBufferLocker& locker;
389
381 bool decompiled{}; 390 bool decompiled{};
382 bool disable_flow_stack{}; 391 bool disable_flow_stack{};
383 392
@@ -386,8 +395,7 @@ private:
386 395
387 std::map<u32, NodeBlock> basic_blocks; 396 std::map<u32, NodeBlock> basic_blocks;
388 NodeBlock global_code; 397 NodeBlock global_code;
389 ASTManager program_manager; 398 ASTManager program_manager{true, true};
390 CompilerSettings settings{};
391 399
392 std::set<u32> used_registers; 400 std::set<u32> used_registers;
393 std::set<Tegra::Shader::Pred> used_predicates; 401 std::set<Tegra::Shader::Pred> used_predicates;