summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h67
-rw-r--r--src/video_core/engines/kepler_compute.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.h45
-rw-r--r--src/video_core/guest_driver.cpp7
-rw-r--r--src/video_core/guest_driver.h21
-rw-r--r--src/video_core/rasterizer_interface.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp111
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp510
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp394
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h22
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp404
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp126
-rw-r--r--src/video_core/shader/const_buffer_locker.h103
-rw-r--r--src/video_core/shader/control_flow.cpp13
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp22
-rw-r--r--src/video_core/shader/decode/texture.cpp5
-rw-r--r--src/video_core/shader/registry.cpp161
-rw-r--r--src/video_core/shader/registry.h137
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/shader/track.cpp18
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/texture_cache/surface_params.cpp6
-rw-r--r--src/video_core/texture_cache/texture_cache.h41
-rw-r--r--src/yuzu/loading_screen.cpp17
43 files changed, 1605 insertions, 1534 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 9afc6105d..fbebed715 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp
38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" 40 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
41 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
42 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
43 "${VIDEO_CORE}/shader/decode/arithmetic.cpp" 41 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
44 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" 42 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
45 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" 43 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
@@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp
72 "${VIDEO_CORE}/shader/ast.h" 70 "${VIDEO_CORE}/shader/ast.h"
73 "${VIDEO_CORE}/shader/compiler_settings.cpp" 71 "${VIDEO_CORE}/shader/compiler_settings.cpp"
74 "${VIDEO_CORE}/shader/compiler_settings.h" 72 "${VIDEO_CORE}/shader/compiler_settings.h"
75 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
76 "${VIDEO_CORE}/shader/const_buffer_locker.h"
77 "${VIDEO_CORE}/shader/control_flow.cpp" 73 "${VIDEO_CORE}/shader/control_flow.cpp"
78 "${VIDEO_CORE}/shader/control_flow.h" 74 "${VIDEO_CORE}/shader/control_flow.h"
79 "${VIDEO_CORE}/shader/decode.cpp" 75 "${VIDEO_CORE}/shader/decode.cpp"
@@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp
82 "${VIDEO_CORE}/shader/node.h" 78 "${VIDEO_CORE}/shader/node.h"
83 "${VIDEO_CORE}/shader/node_helper.cpp" 79 "${VIDEO_CORE}/shader/node_helper.cpp"
84 "${VIDEO_CORE}/shader/node_helper.h" 80 "${VIDEO_CORE}/shader/node_helper.h"
81 "${VIDEO_CORE}/shader/registry.cpp"
82 "${VIDEO_CORE}/shader/registry.h"
85 "${VIDEO_CORE}/shader/shader_ir.cpp" 83 "${VIDEO_CORE}/shader/shader_ir.cpp"
86 "${VIDEO_CORE}/shader/shader_ir.h" 84 "${VIDEO_CORE}/shader/shader_ir.h"
87 "${VIDEO_CORE}/shader/track.cpp" 85 "${VIDEO_CORE}/shader/track.cpp"
86 "${VIDEO_CORE}/shader/transform_feedback.cpp"
87 "${VIDEO_CORE}/shader/transform_feedback.h"
88 # and also check that the scm_rev files haven't changed 88 # and also check that the scm_rev files haven't changed
89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 14f3b4569..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -65,8 +65,6 @@ add_library(video_core STATIC
65 renderer_opengl/gl_shader_decompiler.h 65 renderer_opengl/gl_shader_decompiler.h
66 renderer_opengl/gl_shader_disk_cache.cpp 66 renderer_opengl/gl_shader_disk_cache.cpp
67 renderer_opengl/gl_shader_disk_cache.h 67 renderer_opengl/gl_shader_disk_cache.h
68 renderer_opengl/gl_shader_gen.cpp
69 renderer_opengl/gl_shader_gen.h
70 renderer_opengl/gl_shader_manager.cpp 68 renderer_opengl/gl_shader_manager.cpp
71 renderer_opengl/gl_shader_manager.h 69 renderer_opengl/gl_shader_manager.h
72 renderer_opengl/gl_shader_util.cpp 70 renderer_opengl/gl_shader_util.cpp
@@ -118,8 +116,6 @@ add_library(video_core STATIC
118 shader/ast.h 116 shader/ast.h
119 shader/compiler_settings.cpp 117 shader/compiler_settings.cpp
120 shader/compiler_settings.h 118 shader/compiler_settings.h
121 shader/const_buffer_locker.cpp
122 shader/const_buffer_locker.h
123 shader/control_flow.cpp 119 shader/control_flow.cpp
124 shader/control_flow.h 120 shader/control_flow.h
125 shader/decode.cpp 121 shader/decode.cpp
@@ -128,9 +124,13 @@ add_library(video_core STATIC
128 shader/node_helper.cpp 124 shader/node_helper.cpp
129 shader/node_helper.h 125 shader/node_helper.h
130 shader/node.h 126 shader/node.h
127 shader/registry.cpp
128 shader/registry.h
131 shader/shader_ir.cpp 129 shader/shader_ir.cpp
132 shader/shader_ir.h 130 shader/shader_ir.h
133 shader/track.cpp 131 shader/track.cpp
132 shader/transform_feedback.cpp
133 shader/transform_feedback.h
134 surface.cpp 134 surface.cpp
135 surface.h 135 surface.h
136 texture_cache/format_lookup_table.cpp 136 texture_cache/format_lookup_table.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index d56a47710..724ee0fd6 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -16,11 +16,12 @@ namespace Tegra::Engines {
16 16
17struct SamplerDescriptor { 17struct SamplerDescriptor {
18 union { 18 union {
19 BitField<0, 20, Tegra::Shader::TextureType> texture_type; 19 u32 raw = 0;
20 BitField<20, 1, u32> is_array; 20 BitField<0, 2, Tegra::Shader::TextureType> texture_type;
21 BitField<21, 1, u32> is_buffer; 21 BitField<2, 3, Tegra::Texture::ComponentType> component_type;
22 BitField<22, 1, u32> is_shadow; 22 BitField<5, 1, u32> is_array;
23 u32 raw{}; 23 BitField<6, 1, u32> is_buffer;
24 BitField<7, 1, u32> is_shadow;
24 }; 25 };
25 26
26 bool operator==(const SamplerDescriptor& rhs) const noexcept { 27 bool operator==(const SamplerDescriptor& rhs) const noexcept {
@@ -31,68 +32,48 @@ struct SamplerDescriptor {
31 return !operator==(rhs); 32 return !operator==(rhs);
32 } 33 }
33 34
34 static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { 35 static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
36 using Tegra::Shader::TextureType;
35 SamplerDescriptor result; 37 SamplerDescriptor result;
36 switch (tic_texture_type) { 38
39 // This is going to be used to determine the shading language type.
40 // Because of that we don't care about all component types on color textures.
41 result.component_type.Assign(tic.r_type.Value());
42
43 switch (tic.texture_type.Value()) {
37 case Tegra::Texture::TextureType::Texture1D: 44 case Tegra::Texture::TextureType::Texture1D:
38 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 45 result.texture_type.Assign(TextureType::Texture1D);
39 result.is_array.Assign(0);
40 result.is_buffer.Assign(0);
41 result.is_shadow.Assign(0);
42 return result; 46 return result;
43 case Tegra::Texture::TextureType::Texture2D: 47 case Tegra::Texture::TextureType::Texture2D:
44 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 48 result.texture_type.Assign(TextureType::Texture2D);
45 result.is_array.Assign(0);
46 result.is_buffer.Assign(0);
47 result.is_shadow.Assign(0);
48 return result; 49 return result;
49 case Tegra::Texture::TextureType::Texture3D: 50 case Tegra::Texture::TextureType::Texture3D:
50 result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); 51 result.texture_type.Assign(TextureType::Texture3D);
51 result.is_array.Assign(0);
52 result.is_buffer.Assign(0);
53 result.is_shadow.Assign(0);
54 return result; 52 return result;
55 case Tegra::Texture::TextureType::TextureCubemap: 53 case Tegra::Texture::TextureType::TextureCubemap:
56 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 54 result.texture_type.Assign(TextureType::TextureCube);
57 result.is_array.Assign(0);
58 result.is_buffer.Assign(0);
59 result.is_shadow.Assign(0);
60 return result; 55 return result;
61 case Tegra::Texture::TextureType::Texture1DArray: 56 case Tegra::Texture::TextureType::Texture1DArray:
62 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 57 result.texture_type.Assign(TextureType::Texture1D);
63 result.is_array.Assign(1); 58 result.is_array.Assign(1);
64 result.is_buffer.Assign(0);
65 result.is_shadow.Assign(0);
66 return result; 59 return result;
67 case Tegra::Texture::TextureType::Texture2DArray: 60 case Tegra::Texture::TextureType::Texture2DArray:
68 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 61 result.texture_type.Assign(TextureType::Texture2D);
69 result.is_array.Assign(1); 62 result.is_array.Assign(1);
70 result.is_buffer.Assign(0);
71 result.is_shadow.Assign(0);
72 return result; 63 return result;
73 case Tegra::Texture::TextureType::Texture1DBuffer: 64 case Tegra::Texture::TextureType::Texture1DBuffer:
74 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); 65 result.texture_type.Assign(TextureType::Texture1D);
75 result.is_array.Assign(0);
76 result.is_buffer.Assign(1); 66 result.is_buffer.Assign(1);
77 result.is_shadow.Assign(0);
78 return result; 67 return result;
79 case Tegra::Texture::TextureType::Texture2DNoMipmap: 68 case Tegra::Texture::TextureType::Texture2DNoMipmap:
80 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 69 result.texture_type.Assign(TextureType::Texture2D);
81 result.is_array.Assign(0);
82 result.is_buffer.Assign(0);
83 result.is_shadow.Assign(0);
84 return result; 70 return result;
85 case Tegra::Texture::TextureType::TextureCubeArray: 71 case Tegra::Texture::TextureType::TextureCubeArray:
86 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); 72 result.texture_type.Assign(TextureType::TextureCube);
87 result.is_array.Assign(1); 73 result.is_array.Assign(1);
88 result.is_buffer.Assign(0);
89 result.is_shadow.Assign(0);
90 return result; 74 return result;
91 default: 75 default:
92 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); 76 result.texture_type.Assign(TextureType::Texture2D);
93 result.is_array.Assign(0);
94 result.is_buffer.Assign(0);
95 result.is_shadow.Assign(0);
96 return result; 77 return result;
97 } 78 }
98 } 79 }
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index ae52afa79..1ecd65925 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
89 89
90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 90 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 91 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
92 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 92 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 93 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
94 return result; 94 return result;
95} 95}
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 89050361e..ce536e29b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -638,7 +638,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
638 638
639 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 639 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
640 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 640 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
641 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); 641 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
642 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 642 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
643 return result; 643 return result;
644} 644}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ed7fc8fdd..8a9e9992e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -67,6 +67,7 @@ public:
67 static constexpr std::size_t NumVaryings = 31; 67 static constexpr std::size_t NumVaryings = 31;
68 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number 68 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
69 static constexpr std::size_t NumClipDistances = 8; 69 static constexpr std::size_t NumClipDistances = 8;
70 static constexpr std::size_t NumTransformFeedbackBuffers = 4;
70 static constexpr std::size_t MaxShaderProgram = 6; 71 static constexpr std::size_t MaxShaderProgram = 6;
71 static constexpr std::size_t MaxShaderStage = 5; 72 static constexpr std::size_t MaxShaderStage = 5;
72 // Maximum number of const buffers per shader stage. 73 // Maximum number of const buffers per shader stage.
@@ -627,6 +628,29 @@ public:
627 float depth_range_far; 628 float depth_range_far;
628 }; 629 };
629 630
631 struct TransformFeedbackBinding {
632 u32 buffer_enable;
633 u32 address_high;
634 u32 address_low;
635 s32 buffer_size;
636 s32 buffer_offset;
637 INSERT_UNION_PADDING_WORDS(3);
638
639 GPUVAddr Address() const {
640 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
641 address_low);
642 }
643 };
644 static_assert(sizeof(TransformFeedbackBinding) == 32);
645
646 struct TransformFeedbackLayout {
647 u32 stream;
648 u32 varying_count;
649 u32 stride;
650 INSERT_UNION_PADDING_WORDS(1);
651 };
652 static_assert(sizeof(TransformFeedbackLayout) == 16);
653
630 bool IsShaderConfigEnabled(std::size_t index) const { 654 bool IsShaderConfigEnabled(std::size_t index) const {
631 // The VertexB is always enabled. 655 // The VertexB is always enabled.
632 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { 656 if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) {
@@ -635,6 +659,10 @@ public:
635 return shader_config[index].enable != 0; 659 return shader_config[index].enable != 0;
636 } 660 }
637 661
662 bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
663 return IsShaderConfigEnabled(static_cast<std::size_t>(type));
664 }
665
638 union { 666 union {
639 struct { 667 struct {
640 INSERT_UNION_PADDING_WORDS(0x45); 668 INSERT_UNION_PADDING_WORDS(0x45);
@@ -683,7 +711,13 @@ public:
683 711
684 u32 rasterize_enable; 712 u32 rasterize_enable;
685 713
686 INSERT_UNION_PADDING_WORDS(0xF1); 714 std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings;
715
716 INSERT_UNION_PADDING_WORDS(0xC0);
717
718 std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts;
719
720 INSERT_UNION_PADDING_WORDS(0x1);
687 721
688 u32 tfb_enabled; 722 u32 tfb_enabled;
689 723
@@ -1202,7 +1236,11 @@ public:
1202 1236
1203 u32 tex_cb_index; 1237 u32 tex_cb_index;
1204 1238
1205 INSERT_UNION_PADDING_WORDS(0x395); 1239 INSERT_UNION_PADDING_WORDS(0x7D);
1240
1241 std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs;
1242
1243 INSERT_UNION_PADDING_WORDS(0x298);
1206 1244
1207 struct { 1245 struct {
1208 /// Compressed address of a buffer that holds information about bound SSBOs. 1246 /// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1428,6 +1466,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8);
1428ASSERT_REG_POSITION(tess_level_outer, 0xC9); 1466ASSERT_REG_POSITION(tess_level_outer, 0xC9);
1429ASSERT_REG_POSITION(tess_level_inner, 0xCD); 1467ASSERT_REG_POSITION(tess_level_inner, 0xCD);
1430ASSERT_REG_POSITION(rasterize_enable, 0xDF); 1468ASSERT_REG_POSITION(rasterize_enable, 0xDF);
1469ASSERT_REG_POSITION(tfb_bindings, 0xE0);
1470ASSERT_REG_POSITION(tfb_layouts, 0x1C0);
1431ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1471ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1432ASSERT_REG_POSITION(rt, 0x200); 1472ASSERT_REG_POSITION(rt, 0x200);
1433ASSERT_REG_POSITION(viewport_transform, 0x280); 1473ASSERT_REG_POSITION(viewport_transform, 0x280);
@@ -1526,6 +1566,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0);
1526ASSERT_REG_POSITION(const_buffer, 0x8E0); 1566ASSERT_REG_POSITION(const_buffer, 0x8E0);
1527ASSERT_REG_POSITION(cb_bind[0], 0x904); 1567ASSERT_REG_POSITION(cb_bind[0], 0x904);
1528ASSERT_REG_POSITION(tex_cb_index, 0x982); 1568ASSERT_REG_POSITION(tex_cb_index, 0x982);
1569ASSERT_REG_POSITION(tfb_varying_locs, 0xA00);
1529ASSERT_REG_POSITION(ssbo_info, 0xD18); 1570ASSERT_REG_POSITION(ssbo_info, 0xD18);
1530ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); 1571ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A);
1531ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); 1572ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F);
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 6adef459e..f058f2744 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -4,13 +4,15 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits> 6#include <limits>
7#include <vector>
7 8
9#include "common/common_types.h"
8#include "video_core/guest_driver.h" 10#include "video_core/guest_driver.h"
9 11
10namespace VideoCore { 12namespace VideoCore {
11 13
12void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { 14void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
13 if (texture_handler_size_deduced) { 15 if (texture_handler_size) {
14 return; 16 return;
15 } 17 }
16 const std::size_t size = bound_offsets.size(); 18 const std::size_t size = bound_offsets.size();
@@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
29 if (min_val > 2) { 31 if (min_val > 2) {
30 return; 32 return;
31 } 33 }
32 texture_handler_size_deduced = true;
33 texture_handler_size = min_texture_handler_size * min_val; 34 texture_handler_size = min_texture_handler_size * min_val;
34} 35}
35 36
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index fc1917347..99450777e 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
7#include <vector> 8#include <vector>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -17,25 +18,29 @@ namespace VideoCore {
17 */ 18 */
18class GuestDriverProfile { 19class GuestDriverProfile {
19public: 20public:
20 void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); 21 explicit GuestDriverProfile() = default;
22 explicit GuestDriverProfile(std::optional<u32> texture_handler_size)
23 : texture_handler_size{texture_handler_size} {}
24
25 void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
21 26
22 u32 GetTextureHandlerSize() const { 27 u32 GetTextureHandlerSize() const {
23 return texture_handler_size; 28 return texture_handler_size.value_or(default_texture_handler_size);
24 } 29 }
25 30
26 bool TextureHandlerSizeKnown() const { 31 bool IsTextureHandlerSizeKnown() const {
27 return texture_handler_size_deduced; 32 return texture_handler_size.has_value();
28 } 33 }
29 34
30private: 35private:
31 // Minimum size of texture handler any driver can use. 36 // Minimum size of texture handler any driver can use.
32 static constexpr u32 min_texture_handler_size = 4; 37 static constexpr u32 min_texture_handler_size = 4;
33 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily 38
34 // use 4 bytes instead. Thus, certain drivers may squish the size. 39 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
40 // Thus, certain drivers may squish the size.
35 static constexpr u32 default_texture_handler_size = 8; 41 static constexpr u32 default_texture_handler_size = 8;
36 42
37 u32 texture_handler_size = default_texture_handler_size; 43 std::optional<u32> texture_handler_size = default_texture_handler_size;
38 bool texture_handler_size_deduced = false;
39}; 44};
40 45
41} // namespace VideoCore 46} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3e4514b94..1a68e3caa 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1;
25 25
26enum class LoadCallbackStage { 26enum class LoadCallbackStage {
27 Prepare, 27 Prepare,
28 Decompile,
29 Build, 28 Build,
30 Complete, 29 Complete,
31}; 30};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8e48a6482..1af4268a4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,7 +28,6 @@
28#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 29#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_gen.h"
32#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
33#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
34 33
@@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
76} 75}
77 76
78std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 77std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
79 const GLShader::ConstBufferEntry& entry) { 78 const ConstBufferEntry& entry) {
80 if (!entry.IsIndirect()) { 79 if (!entry.IsIndirect()) {
81 return entry.GetSize(); 80 return entry.GetSize();
82 } 81 }
@@ -272,9 +271,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
272 SetupDrawTextures(stage, shader); 271 SetupDrawTextures(stage, shader);
273 SetupDrawImages(stage, shader); 272 SetupDrawImages(stage, shader);
274 273
275 const ProgramVariant variant(primitive_mode); 274 const GLuint program_handle = shader->GetHandle();
276 const auto program_handle = shader->GetHandle(variant);
277
278 switch (program) { 275 switch (program) {
279 case Maxwell::ShaderProgram::VertexA: 276 case Maxwell::ShaderProgram::VertexA:
280 case Maxwell::ShaderProgram::VertexB: 277 case Maxwell::ShaderProgram::VertexB:
@@ -295,7 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
295 // When a clip distance is enabled but not set in the shader it crops parts of the screen 292 // When a clip distance is enabled but not set in the shader it crops parts of the screen
296 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 293 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
297 // clip distances only when it's written by a shader stage. 294 // clip distances only when it's written by a shader stage.
298 clip_distances |= shader->GetShaderEntries().clip_distances; 295 clip_distances |= shader->GetEntries().clip_distances;
299 296
300 // When VertexA is enabled, we have dual vertex shaders 297 // When VertexA is enabled, we have dual vertex shaders
301 if (program == Maxwell::ShaderProgram::VertexA) { 298 if (program == Maxwell::ShaderProgram::VertexA) {
@@ -499,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
499 SyncCullMode(); 496 SyncCullMode();
500 SyncPrimitiveRestart(); 497 SyncPrimitiveRestart();
501 SyncScissorTest(); 498 SyncScissorTest();
502 SyncTransformFeedback();
503 SyncPointState(); 499 SyncPointState();
504 SyncPolygonOffset(); 500 SyncPolygonOffset();
505 SyncAlphaTest(); 501 SyncAlphaTest();
@@ -572,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
572 glTextureBarrier(); 568 glTextureBarrier();
573 } 569 }
574 570
575 ++num_queued_commands; 571 BeginTransformFeedback(primitive_mode);
576 572
577 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); 573 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
578 const GLsizei num_instances = 574 const GLsizei num_instances =
@@ -611,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
611 num_instances, base_instance); 607 num_instances, base_instance);
612 } 608 }
613 } 609 }
610
611 EndTransformFeedback();
612
613 ++num_queued_commands;
614} 614}
615 615
616void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 616void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -623,12 +623,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
623 auto kernel = shader_cache.GetComputeKernel(code_addr); 623 auto kernel = shader_cache.GetComputeKernel(code_addr);
624 SetupComputeTextures(kernel); 624 SetupComputeTextures(kernel);
625 SetupComputeImages(kernel); 625 SetupComputeImages(kernel);
626 626 program_manager.BindComputeShader(kernel->GetHandle());
627 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
628 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
629 launch_desc.block_dim_z, launch_desc.shared_alloc,
630 launch_desc.local_pos_alloc);
631 program_manager.BindComputeShader(kernel->GetHandle(variant));
632 627
633 const std::size_t buffer_size = 628 const std::size_t buffer_size =
634 Tegra::Engines::KeplerCompute::NumConstBuffers * 629 Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -646,6 +641,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
646 bind_ubo_pushbuffer.Bind(); 641 bind_ubo_pushbuffer.Bind();
647 bind_ssbo_pushbuffer.Bind(); 642 bind_ssbo_pushbuffer.Bind();
648 643
644 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
649 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 645 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
650 ++num_queued_commands; 646 ++num_queued_commands;
651} 647}
@@ -750,7 +746,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
750 const auto& shader_stage = stages[stage_index]; 746 const auto& shader_stage = stages[stage_index];
751 747
752 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; 748 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
753 for (const auto& entry : shader->GetShaderEntries().const_buffers) { 749 for (const auto& entry : shader->GetEntries().const_buffers) {
754 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 750 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
755 SetupConstBuffer(binding++, buffer, entry); 751 SetupConstBuffer(binding++, buffer, entry);
756 } 752 }
@@ -761,7 +757,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
761 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 757 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
762 758
763 u32 binding = 0; 759 u32 binding = 0;
764 for (const auto& entry : kernel->GetShaderEntries().const_buffers) { 760 for (const auto& entry : kernel->GetEntries().const_buffers) {
765 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 761 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
766 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 762 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
767 Tegra::Engines::ConstBufferInfo buffer; 763 Tegra::Engines::ConstBufferInfo buffer;
@@ -773,7 +769,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
773} 769}
774 770
775void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 771void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
776 const GLShader::ConstBufferEntry& entry) { 772 const ConstBufferEntry& entry) {
777 if (!buffer.enabled) { 773 if (!buffer.enabled) {
778 // Set values to zero to unbind buffers 774 // Set values to zero to unbind buffers
779 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, 775 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -797,7 +793,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
797 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 793 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
798 794
799 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; 795 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
800 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { 796 for (const auto& entry : shader->GetEntries().global_memory_entries) {
801 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; 797 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
802 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 798 const auto gpu_addr{memory_manager.Read<u64>(addr)};
803 const auto size{memory_manager.Read<u32>(addr + 8)}; 799 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -811,7 +807,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
811 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 807 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
812 808
813 u32 binding = 0; 809 u32 binding = 0;
814 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { 810 for (const auto& entry : kernel->GetEntries().global_memory_entries) {
815 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; 811 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
816 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 812 const auto gpu_addr{memory_manager.Read<u64>(addr)};
817 const auto size{memory_manager.Read<u32>(addr + 8)}; 813 const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -819,7 +815,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
819 } 815 }
820} 816}
821 817
822void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, 818void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
823 GPUVAddr gpu_addr, std::size_t size) { 819 GPUVAddr gpu_addr, std::size_t size) {
824 const auto alignment{device.GetShaderStorageBufferAlignment()}; 820 const auto alignment{device.GetShaderStorageBufferAlignment()};
825 const auto [ssbo, buffer_offset] = 821 const auto [ssbo, buffer_offset] =
@@ -831,7 +827,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
831 MICROPROFILE_SCOPE(OpenGL_Texture); 827 MICROPROFILE_SCOPE(OpenGL_Texture);
832 const auto& maxwell3d = system.GPU().Maxwell3D(); 828 const auto& maxwell3d = system.GPU().Maxwell3D();
833 u32 binding = device.GetBaseBindings(stage_index).sampler; 829 u32 binding = device.GetBaseBindings(stage_index).sampler;
834 for (const auto& entry : shader->GetShaderEntries().samplers) { 830 for (const auto& entry : shader->GetEntries().samplers) {
835 const auto shader_type = static_cast<ShaderType>(stage_index); 831 const auto shader_type = static_cast<ShaderType>(stage_index);
836 for (std::size_t i = 0; i < entry.Size(); ++i) { 832 for (std::size_t i = 0; i < entry.Size(); ++i) {
837 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); 833 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
@@ -844,7 +840,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
844 MICROPROFILE_SCOPE(OpenGL_Texture); 840 MICROPROFILE_SCOPE(OpenGL_Texture);
845 const auto& compute = system.GPU().KeplerCompute(); 841 const auto& compute = system.GPU().KeplerCompute();
846 u32 binding = 0; 842 u32 binding = 0;
847 for (const auto& entry : kernel->GetShaderEntries().samplers) { 843 for (const auto& entry : kernel->GetEntries().samplers) {
848 for (std::size_t i = 0; i < entry.Size(); ++i) { 844 for (std::size_t i = 0; i < entry.Size(); ++i) {
849 const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); 845 const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
850 SetupTexture(binding++, texture, entry); 846 SetupTexture(binding++, texture, entry);
@@ -853,7 +849,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
853} 849}
854 850
855void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 851void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
856 const GLShader::SamplerEntry& entry) { 852 const SamplerEntry& entry) {
857 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 853 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
858 if (!view) { 854 if (!view) {
859 // Can occur when texture addr is null or its memory is unmapped/invalid 855 // Can occur when texture addr is null or its memory is unmapped/invalid
@@ -876,7 +872,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
876void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 872void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
877 const auto& maxwell3d = system.GPU().Maxwell3D(); 873 const auto& maxwell3d = system.GPU().Maxwell3D();
878 u32 binding = device.GetBaseBindings(stage_index).image; 874 u32 binding = device.GetBaseBindings(stage_index).image;
879 for (const auto& entry : shader->GetShaderEntries().images) { 875 for (const auto& entry : shader->GetEntries().images) {
880 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 876 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
881 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 877 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
882 SetupImage(binding++, tic, entry); 878 SetupImage(binding++, tic, entry);
@@ -886,14 +882,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
886void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 882void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
887 const auto& compute = system.GPU().KeplerCompute(); 883 const auto& compute = system.GPU().KeplerCompute();
888 u32 binding = 0; 884 u32 binding = 0;
889 for (const auto& entry : shader->GetShaderEntries().images) { 885 for (const auto& entry : shader->GetEntries().images) {
890 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; 886 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
891 SetupImage(binding++, tic, entry); 887 SetupImage(binding++, tic, entry);
892 } 888 }
893} 889}
894 890
895void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 891void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
896 const GLShader::ImageEntry& entry) { 892 const ImageEntry& entry) {
897 const auto view = texture_cache.GetImageSurface(tic, entry); 893 const auto view = texture_cache.GetImageSurface(tic, entry);
898 if (!view) { 894 if (!view) {
899 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); 895 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
@@ -1297,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() {
1297 } 1293 }
1298} 1294}
1299 1295
1300void RasterizerOpenGL::SyncTransformFeedback() {
1301 const auto& regs = system.GPU().Maxwell3D().regs;
1302 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1303}
1304
1305void RasterizerOpenGL::SyncPointState() { 1296void RasterizerOpenGL::SyncPointState() {
1306 auto& gpu = system.GPU().Maxwell3D(); 1297 auto& gpu = system.GPU().Maxwell3D();
1307 auto& flags = gpu.dirty.flags; 1298 auto& flags = gpu.dirty.flags;
@@ -1377,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1377 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); 1368 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1378} 1369}
1379 1370
1371void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1372 const auto& regs = system.GPU().Maxwell3D().regs;
1373 if (regs.tfb_enabled == 0) {
1374 return;
1375 }
1376
1377 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1378 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1379 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1380
1381 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1382 const auto& binding = regs.tfb_bindings[index];
1383 if (!binding.buffer_enable) {
1384 if (enabled_transform_feedback_buffers[index]) {
1385 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1386 0);
1387 }
1388 enabled_transform_feedback_buffers[index] = false;
1389 continue;
1390 }
1391 enabled_transform_feedback_buffers[index] = true;
1392
1393 auto& tfb_buffer = transform_feedback_buffers[index];
1394 tfb_buffer.Create();
1395
1396 const GLuint handle = tfb_buffer.handle;
1397 const std::size_t size = binding.buffer_size;
1398 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1399 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1400 static_cast<GLsizeiptr>(size));
1401 }
1402
1403 glBeginTransformFeedback(GL_POINTS);
1404}
1405
1406void RasterizerOpenGL::EndTransformFeedback() {
1407 const auto& regs = system.GPU().Maxwell3D().regs;
1408 if (regs.tfb_enabled == 0) {
1409 return;
1410 }
1411
1412 glEndTransformFeedback();
1413
1414 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1415 const auto& binding = regs.tfb_bindings[index];
1416 if (!binding.buffer_enable) {
1417 continue;
1418 }
1419 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1420
1421 const GLuint handle = transform_feedback_buffers[index].handle;
1422 const GPUVAddr gpu_addr = binding.Address();
1423 const std::size_t size = binding.buffer_size;
1424 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1425 glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
1426 }
1427}
1428
1380} // namespace OpenGL 1429} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e5681d6df..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -98,7 +98,7 @@ private:
98 98
99 /// Configures a constant buffer. 99 /// Configures a constant buffer.
100 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 100 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
101 const GLShader::ConstBufferEntry& entry); 101 const ConstBufferEntry& entry);
102 102
103 /// Configures the current global memory entries to use for the draw command. 103 /// Configures the current global memory entries to use for the draw command.
104 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 104 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -107,7 +107,7 @@ private:
107 void SetupComputeGlobalMemory(const Shader& kernel); 107 void SetupComputeGlobalMemory(const Shader& kernel);
108 108
109 /// Configures a constant buffer. 109 /// Configures a constant buffer.
110 void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 110 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
111 std::size_t size); 111 std::size_t size);
112 112
113 /// Configures the current textures to use for the draw command. 113 /// Configures the current textures to use for the draw command.
@@ -118,7 +118,7 @@ private:
118 118
119 /// Configures a texture. 119 /// Configures a texture.
120 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 120 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
121 const GLShader::SamplerEntry& entry); 121 const SamplerEntry& entry);
122 122
123 /// Configures images in a graphics shader. 123 /// Configures images in a graphics shader.
124 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 124 void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -127,8 +127,7 @@ private:
127 void SetupComputeImages(const Shader& shader); 127 void SetupComputeImages(const Shader& shader);
128 128
129 /// Configures an image. 129 /// Configures an image.
130 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, 130 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
131 const GLShader::ImageEntry& entry);
132 131
133 /// Syncs the viewport and depth range to match the guest state 132 /// Syncs the viewport and depth range to match the guest state
134 void SyncViewport(); 133 void SyncViewport();
@@ -169,9 +168,6 @@ private:
169 /// Syncs the scissor test state to match the guest state 168 /// Syncs the scissor test state to match the guest state
170 void SyncScissorTest(); 169 void SyncScissorTest();
171 170
172 /// Syncs the transform feedback state to match the guest state
173 void SyncTransformFeedback();
174
175 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
176 void SyncPointState(); 172 void SyncPointState();
177 173
@@ -193,6 +189,12 @@ private:
193 /// Syncs the framebuffer sRGB state to match the guest state 189 /// Syncs the framebuffer sRGB state to match the guest state
194 void SyncFramebufferSRGB(); 190 void SyncFramebufferSRGB();
195 191
192 /// Begin a transform feedback
193 void BeginTransformFeedback(GLenum primitive_mode);
194
195 /// End a transform feedback
196 void EndTransformFeedback();
197
196 /// Check for extension that are not strictly required but are needed for correct emulation 198 /// Check for extension that are not strictly required but are needed for correct emulation
197 void CheckExtensions(); 199 void CheckExtensions();
198 200
@@ -230,6 +232,11 @@ private:
230 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 232 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
231 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 233 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
232 234
235 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
236 transform_feedback_buffers;
237 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
238 enabled_transform_feedback_buffers;
239
233 /// Number of commands queued to the OpenGL driver. Reseted on flush. 240 /// Number of commands queued to the OpenGL driver. Reseted on flush.
234 std::size_t num_queued_commands = 0; 241 std::size_t num_queued_commands = 0;
235 242
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4cb89db8c..e3d31c3eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,12 +2,16 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <atomic>
6#include <functional>
5#include <mutex> 7#include <mutex>
6#include <optional> 8#include <optional>
7#include <string> 9#include <string>
8#include <thread> 10#include <thread>
9#include <unordered_set> 11#include <unordered_set>
12
10#include <boost/functional/hash.hpp> 13#include <boost/functional/hash.hpp>
14
11#include "common/alignment.h" 15#include "common/alignment.h"
12#include "common/assert.h" 16#include "common/assert.h"
13#include "common/logging/log.h" 17#include "common/logging/log.h"
@@ -24,13 +28,14 @@
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/renderer_opengl/gl_state_tracker.h" 29#include "video_core/renderer_opengl/gl_state_tracker.h"
26#include "video_core/renderer_opengl/utils.h" 30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/registry.h"
27#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
28 33
29namespace OpenGL { 34namespace OpenGL {
30 35
31using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
32using VideoCommon::Shader::ConstBufferLocker;
33using VideoCommon::Shader::ProgramCode; 37using VideoCommon::Shader::ProgramCode;
38using VideoCommon::Shader::Registry;
34using VideoCommon::Shader::ShaderIR; 39using VideoCommon::Shader::ShaderIR;
35 40
36namespace { 41namespace {
@@ -56,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
56} 61}
57 62
58/// Calculates the size of a program stream 63/// Calculates the size of a program stream
59std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 64std::size_t CalculateProgramSize(const ProgramCode& program) {
60 constexpr std::size_t start_offset = 10; 65 constexpr std::size_t start_offset = 10;
61 // This is the encoded version of BRA that jumps to itself. All Nvidia 66 // This is the encoded version of BRA that jumps to itself. All Nvidia
62 // shaders end with one. 67 // shaders end with one.
@@ -109,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
109 } 114 }
110} 115}
111 116
112/// Describes primitive behavior on geometry shaders
113constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
114 switch (primitive_mode) {
115 case GL_POINTS:
116 return {"points", 1};
117 case GL_LINES:
118 case GL_LINE_STRIP:
119 return {"lines", 2};
120 case GL_LINES_ADJACENCY:
121 case GL_LINE_STRIP_ADJACENCY:
122 return {"lines_adjacency", 4};
123 case GL_TRIANGLES:
124 case GL_TRIANGLE_STRIP:
125 case GL_TRIANGLE_FAN:
126 return {"triangles", 3};
127 case GL_TRIANGLES_ADJACENCY:
128 case GL_TRIANGLE_STRIP_ADJACENCY:
129 return {"triangles_adjacency", 6};
130 default:
131 return {"points", 1};
132 }
133}
134
135/// Hashes one (or two) program streams 117/// Hashes one (or two) program streams
136u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, 118u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
137 const ProgramCode& code_b) { 119 const ProgramCode& code_b = {}) {
138 u64 unique_identifier = boost::hash_value(code); 120 u64 unique_identifier = boost::hash_value(code);
139 if (is_a) { 121 if (is_a) {
140 // VertexA programs include two programs 122 // VertexA programs include two programs
@@ -143,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
143 return unique_identifier; 125 return unique_identifier;
144} 126}
145 127
146/// Creates an unspecialized program from code streams
147std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
148 const std::optional<ShaderIR>& ir_b) {
149 switch (shader_type) {
150 case ShaderType::Vertex:
151 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
152 case ShaderType::Geometry:
153 return GLShader::GenerateGeometryShader(device, ir);
154 case ShaderType::Fragment:
155 return GLShader::GenerateFragmentShader(device, ir);
156 case ShaderType::Compute:
157 return GLShader::GenerateComputeShader(device, ir);
158 default:
159 UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
160 return {};
161 }
162}
163
164constexpr const char* GetShaderTypeName(ShaderType shader_type) { 128constexpr const char* GetShaderTypeName(ShaderType shader_type) {
165 switch (shader_type) { 129 switch (shader_type) {
166 case ShaderType::Vertex: 130 case ShaderType::Vertex:
@@ -196,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
196 return {}; 160 return {};
197} 161}
198 162
199std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { 163std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
200 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); 164 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
201} 165}
202 166
203Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, 167std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
204 ShaderType shader_type) { 168 const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
205 if (shader_type == ShaderType::Compute) { 169 const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
206 return system.GPU().KeplerCompute(); 170 entry.graphics_info, entry.compute_info};
207 } else { 171 const auto registry = std::make_shared<Registry>(entry.type, info);
208 return system.GPU().Maxwell3D(); 172 for (const auto& [address, value] : entry.keys) {
209 } 173 const auto [buffer, offset] = address;
210} 174 registry->InsertKey(buffer, offset, value);
211
212std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
213 return std::make_unique<ConstBufferLocker>(shader_type,
214 GetConstBufferEngineInterface(system, shader_type));
215}
216
217void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
218 locker.SetBoundBuffer(usage.bound_buffer);
219 for (const auto& key : usage.keys) {
220 const auto [buffer, offset] = key.first;
221 locker.InsertKey(buffer, offset, key.second);
222 } 175 }
223 for (const auto& [offset, sampler] : usage.bound_samplers) { 176 for (const auto& [offset, sampler] : entry.bound_samplers) {
224 locker.InsertBoundSampler(offset, sampler); 177 registry->InsertBoundSampler(offset, sampler);
225 } 178 }
226 for (const auto& [key, sampler] : usage.bindless_samplers) { 179 for (const auto& [key, sampler] : entry.bindless_samplers) {
227 const auto [buffer, offset] = key; 180 const auto [buffer, offset] = key;
228 locker.InsertBindlessSampler(buffer, offset, sampler); 181 registry->InsertBindlessSampler(buffer, offset, sampler);
229 } 182 }
183 return registry;
230} 184}
231 185
232CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, 186std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
233 const ProgramCode& code, const ProgramCode& code_b, 187 u64 unique_identifier, const ShaderIR& ir,
234 ConstBufferLocker& locker, const ProgramVariant& variant, 188 const Registry& registry, bool hint_retrievable = false) {
235 bool hint_retrievable = false) { 189 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
236 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); 190 LOG_INFO(Render_OpenGL, "{}", shader_id);
237
238 const bool is_compute = shader_type == ShaderType::Compute;
239 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
240 const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
241 std::optional<ShaderIR> ir_b;
242 if (!code_b.empty()) {
243 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
244 }
245
246 std::string source = fmt::format(R"(// {}
247#version 430 core
248#extension GL_ARB_separate_shader_objects : enable
249)",
250 GetShaderId(unique_identifier, shader_type));
251 if (device.HasShaderBallot()) {
252 source += "#extension GL_ARB_shader_ballot : require\n";
253 }
254 if (device.HasVertexViewportLayer()) {
255 source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
256 }
257 if (device.HasImageLoadFormatted()) {
258 source += "#extension GL_EXT_shader_image_load_formatted : require\n";
259 }
260 if (device.HasWarpIntrinsics()) {
261 source += "#extension GL_NV_gpu_shader5 : require\n"
262 "#extension GL_NV_shader_thread_group : require\n"
263 "#extension GL_NV_shader_thread_shuffle : require\n";
264 }
265 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
266 // on places where we don't want to.
267 // Thanks to Ryujinx for finding this workaround.
268 source += "#pragma optionNV(fastmath off)\n";
269
270 if (shader_type == ShaderType::Geometry) {
271 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
272 source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
273 source += fmt::format("layout ({}) in;\n", glsl_topology);
274 }
275 if (shader_type == ShaderType::Compute) {
276 if (variant.local_memory_size > 0) {
277 source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
278 Common::AlignUp(variant.local_memory_size, 4) / 4);
279 }
280 source +=
281 fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
282 variant.block_x, variant.block_y, variant.block_z);
283
284 if (variant.shared_memory_size > 0) {
285 // shared_memory_size is described in number of words
286 source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
287 }
288 }
289
290 source += '\n';
291 source += GenerateGLSL(device, shader_type, ir, ir_b);
292 191
192 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
293 OGLShader shader; 193 OGLShader shader;
294 shader.Create(source.c_str(), GetGLShaderType(shader_type)); 194 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
295 195
296 auto program = std::make_shared<OGLProgram>(); 196 auto program = std::make_shared<OGLProgram>();
297 program->Create(true, hint_retrievable, shader.handle); 197 program->Create(true, hint_retrievable, shader.handle);
@@ -299,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
299} 199}
300 200
301std::unordered_set<GLenum> GetSupportedFormats() { 201std::unordered_set<GLenum> GetSupportedFormats() {
302 GLint num_formats{}; 202 GLint num_formats;
303 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 203 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
304 204
305 std::vector<GLint> formats(num_formats); 205 std::vector<GLint> formats(num_formats);
@@ -314,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
314 214
315} // Anonymous namespace 215} // Anonymous namespace
316 216
317CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, 217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
318 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) 218 std::shared_ptr<VideoCommon::Shader::Registry> registry,
319 : RasterizerCacheObject{params.host_ptr}, system{params.system}, 219 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
320 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, 220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
321 unique_identifier{params.unique_identifier}, shader_type{shader_type}, 221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
322 entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { 222
323 if (!params.precompiled_variants) { 223CachedShader::~CachedShader() = default;
324 return; 224
325 } 225GLuint CachedShader::GetHandle() const {
326 for (const auto& pair : *params.precompiled_variants) { 226 DEBUG_ASSERT(registry->IsConsistent());
327 auto locker = MakeLocker(system, shader_type); 227 return program->handle;
328 const auto& usage = pair->first;
329 FillLocker(*locker, usage);
330
331 std::unique_ptr<LockerVariant>* locker_variant = nullptr;
332 const auto it =
333 std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
334 return variant->locker->HasEqualKeys(*locker);
335 });
336 if (it == locker_variants.end()) {
337 locker_variant = &locker_variants.emplace_back();
338 *locker_variant = std::make_unique<LockerVariant>();
339 locker_variant->get()->locker = std::move(locker);
340 } else {
341 locker_variant = &*it;
342 }
343 locker_variant->get()->programs.emplace(usage.variant, pair->second);
344 }
345} 228}
346 229
347Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 230Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
348 Maxwell::ShaderProgram program_type, ProgramCode code, 231 Maxwell::ShaderProgram program_type, ProgramCode code,
349 ProgramCode code_b) { 232 ProgramCode code_b) {
350 const auto shader_type = GetShaderType(program_type); 233 const auto shader_type = GetShaderType(program_type);
351 params.disk_cache.SaveRaw( 234 const std::size_t size_in_bytes = code.size() * sizeof(u64);
352 ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
353 235
354 ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); 236 auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
355 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); 237 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
356 // TODO(Rodrigo): Handle VertexA shaders 238 // TODO(Rodrigo): Handle VertexA shaders
357 // std::optional<ShaderIR> ir_b; 239 // std::optional<ShaderIR> ir_b;
358 // if (!code_b.empty()) { 240 // if (!code_b.empty()) {
359 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); 241 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
360 // } 242 // }
361 return std::shared_ptr<CachedShader>(new CachedShader( 243 auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
362 params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); 244
245 ShaderDiskCacheEntry entry;
246 entry.type = shader_type;
247 entry.code = std::move(code);
248 entry.code_b = std::move(code_b);
249 entry.unique_identifier = params.unique_identifier;
250 entry.bound_buffer = registry->GetBoundBuffer();
251 entry.graphics_info = registry->GetGraphicsInfo();
252 entry.keys = registry->GetKeys();
253 entry.bound_samplers = registry->GetBoundSamplers();
254 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry));
256
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
258 size_in_bytes, std::move(registry),
259 MakeEntries(ir), std::move(program)));
363} 260}
364 261
365Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
366 params.disk_cache.SaveRaw( 263 const std::size_t size_in_bytes = code.size() * sizeof(u64);
367 ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); 264
368 265 auto& engine = params.system.GPU().KeplerCompute();
369 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, 266 auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
370 params.system.GPU().KeplerCompute()); 267 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
371 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); 268 const u64 uid = params.unique_identifier;
372 return std::shared_ptr<CachedShader>(new CachedShader( 269 auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
373 params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); 270
271 ShaderDiskCacheEntry entry;
272 entry.type = ShaderType::Compute;
273 entry.code = std::move(code);
274 entry.unique_identifier = uid;
275 entry.bound_buffer = registry->GetBoundBuffer();
276 entry.compute_info = registry->GetComputeInfo();
277 entry.keys = registry->GetKeys();
278 entry.bound_samplers = registry->GetBoundSamplers();
279 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry));
281
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
283 size_in_bytes, std::move(registry),
284 MakeEntries(ir), std::move(program)));
374} 285}
375 286
376Shader CachedShader::CreateFromCache(const ShaderParameters& params, 287Shader CachedShader::CreateFromCache(const ShaderParameters& params,
377 const UnspecializedShader& unspecialized) { 288 const PrecompiledShader& precompiled_shader,
378 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, 289 std::size_t size_in_bytes) {
379 unspecialized.entries, unspecialized.code, 290 return std::shared_ptr<CachedShader>(new CachedShader(
380 unspecialized.code_b)); 291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
381} 292 precompiled_shader.entries, precompiled_shader.program));
382
383GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
384 EnsureValidLockerVariant();
385
386 const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
387 auto& program = entry->second;
388 if (!is_cache_miss) {
389 return program->handle;
390 }
391
392 program = BuildShader(device, unique_identifier, shader_type, code, code_b,
393 *curr_locker_variant->locker, variant);
394 disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
395
396 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
397 return program->handle;
398}
399
400bool CachedShader::EnsureValidLockerVariant() {
401 const auto previous_variant = curr_locker_variant;
402 if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
403 curr_locker_variant = nullptr;
404 }
405 if (!curr_locker_variant) {
406 for (auto& variant : locker_variants) {
407 if (variant->locker->IsConsistent()) {
408 curr_locker_variant = variant.get();
409 }
410 }
411 }
412 if (!curr_locker_variant) {
413 auto& new_variant = locker_variants.emplace_back();
414 new_variant = std::make_unique<LockerVariant>();
415 new_variant->locker = MakeLocker(system, shader_type);
416 curr_locker_variant = new_variant.get();
417 }
418 return previous_variant == curr_locker_variant;
419}
420
421ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
422 const ConstBufferLocker& locker) const {
423 return ShaderDiskCacheUsage{unique_identifier, variant,
424 locker.GetBoundBuffer(), locker.GetKeys(),
425 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
426} 293}
427 294
428ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -432,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
432 299
433void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 300void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
434 const VideoCore::DiskResourceLoadCallback& callback) { 301 const VideoCore::DiskResourceLoadCallback& callback) {
435 const auto transferable = disk_cache.LoadTransferable(); 302 const std::optional transferable = disk_cache.LoadTransferable();
436 if (!transferable) { 303 if (!transferable) {
437 return; 304 return;
438 } 305 }
439 const auto [raws, shader_usages] = *transferable;
440 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
441 return;
442 }
443 306
444 const auto dumps = disk_cache.LoadPrecompiled(); 307 const std::vector gl_cache = disk_cache.LoadPrecompiled();
445 const auto supported_formats = GetSupportedFormats(); 308 const auto supported_formats = GetSupportedFormats();
446 309
447 // Track if precompiled cache was altered during loading to know if we have to 310 // Track if precompiled cache was altered during loading to know if we have to
@@ -450,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
450 313
451 // Inform the frontend about shader build initialization 314 // Inform the frontend about shader build initialization
452 if (callback) { 315 if (callback) {
453 callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); 316 callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
454 } 317 }
455 318
456 std::mutex mutex; 319 std::mutex mutex;
457 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex 320 std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
458 std::atomic_bool compilation_failed = false; 321 std::atomic_bool gl_cache_failed = false;
459 322
460 const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, 323 const auto find_precompiled = [&gl_cache](u64 id) {
461 std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, 324 return std::find_if(gl_cache.begin(), gl_cache.end(),
462 const ShaderDumpsMap& dumps) { 325 [id](const auto& entry) { return entry.unique_identifier == id; });
326 };
327
328 const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
329 std::size_t end) {
463 context->MakeCurrent(); 330 context->MakeCurrent();
464 SCOPE_EXIT({ return context->DoneCurrent(); }); 331 SCOPE_EXIT({ return context->DoneCurrent(); });
465 332
466 for (std::size_t i = begin; i < end; ++i) { 333 for (std::size_t i = begin; i < end; ++i) {
467 if (stop_loading || compilation_failed) { 334 if (stop_loading) {
468 return; 335 return;
469 } 336 }
470 const auto& usage{shader_usages[i]}; 337 const auto& entry = (*transferable)[i];
471 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 338 const u64 uid = entry.unique_identifier;
472 const auto dump{dumps.find(usage)}; 339 const auto it = find_precompiled(uid);
473 340 const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
474 CachedProgram shader; 341
475 if (dump != dumps.end()) { 342 const bool is_compute = entry.type == ShaderType::Compute;
476 // If the shader is dumped, attempt to load it with 343 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
477 shader = GeneratePrecompiledProgram(dump->second, supported_formats); 344 auto registry = MakeRegistry(entry);
478 if (!shader) { 345 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
479 compilation_failed = true; 346
480 return; 347 std::shared_ptr<OGLProgram> program;
348 if (precompiled_entry) {
349 // If the shader is precompiled, attempt to load it with
350 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
351 if (!program) {
352 gl_cache_failed = true;
481 } 353 }
482 } 354 }
483 if (!shader) { 355 if (!program) {
484 auto locker{MakeLocker(system, unspecialized.type)}; 356 // Otherwise compile it from GLSL
485 FillLocker(*locker, usage); 357 program = BuildShader(device, entry.type, uid, ir, *registry, true);
486
487 shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
488 unspecialized.code, unspecialized.code_b, *locker,
489 usage.variant, true);
490 } 358 }
491 359
360 PrecompiledShader shader;
361 shader.program = std::move(program);
362 shader.registry = std::move(registry);
363 shader.entries = MakeEntries(ir);
364
492 std::scoped_lock lock{mutex}; 365 std::scoped_lock lock{mutex};
493 if (callback) { 366 if (callback) {
494 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 367 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
495 shader_usages.size()); 368 transferable->size());
496 } 369 }
497 370 runtime_cache.emplace(entry.unique_identifier, std::move(shader));
498 precompiled_programs.emplace(usage, std::move(shader));
499
500 // TODO(Rodrigo): Is there a better way to do this?
501 precompiled_variants[usage.unique_identifier].push_back(
502 precompiled_programs.find(usage));
503 } 371 }
504 }; 372 };
505 373
506 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; 374 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
507 const std::size_t bucket_size{shader_usages.size() / num_workers}; 375 const std::size_t bucket_size{transferable->size() / num_workers};
508 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 376 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
509 std::vector<std::thread> threads(num_workers); 377 std::vector<std::thread> threads(num_workers);
510 for (std::size_t i = 0; i < num_workers; ++i) { 378 for (std::size_t i = 0; i < num_workers; ++i) {
511 const bool is_last_worker = i + 1 == num_workers; 379 const bool is_last_worker = i + 1 == num_workers;
512 const std::size_t start{bucket_size * i}; 380 const std::size_t start{bucket_size * i};
513 const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; 381 const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
514 382
515 // On some platforms the shared context has to be created from the GUI thread 383 // On some platforms the shared context has to be created from the GUI thread
516 contexts[i] = emu_window.CreateSharedContext(); 384 contexts[i] = emu_window.CreateSharedContext();
517 threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); 385 threads[i] = std::thread(worker, contexts[i].get(), start, end);
518 } 386 }
519 for (auto& thread : threads) { 387 for (auto& thread : threads) {
520 thread.join(); 388 thread.join();
521 } 389 }
522 390
523 if (compilation_failed) { 391 if (gl_cache_failed) {
524 // Invalidate the precompiled cache if a shader dumped shader was rejected 392 // Invalidate the precompiled cache if a shader dumped shader was rejected
525 disk_cache.InvalidatePrecompiled(); 393 disk_cache.InvalidatePrecompiled();
526 precompiled_cache_altered = true; 394 precompiled_cache_altered = true;
@@ -533,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
533 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 401 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
534 // before precompiling them 402 // before precompiling them
535 403
536 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 404 for (std::size_t i = 0; i < transferable->size(); ++i) {
537 const auto& usage{shader_usages[i]}; 405 const u64 id = (*transferable)[i].unique_identifier;
538 if (dumps.find(usage) == dumps.end()) { 406 const auto it = find_precompiled(id);
539 const auto& program{precompiled_programs.at(usage)}; 407 if (it == gl_cache.end()) {
540 disk_cache.SaveDump(usage, program->handle); 408 const GLuint program = runtime_cache.at(id).program->handle;
409 disk_cache.SavePrecompiled(id, program);
541 precompiled_cache_altered = true; 410 precompiled_cache_altered = true;
542 } 411 }
543 } 412 }
@@ -547,80 +416,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
547 } 416 }
548} 417}
549 418
550const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { 419std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
551 const auto it = precompiled_variants.find(unique_identifier); 420 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
552 return it == precompiled_variants.end() ? nullptr : &it->second; 421 const std::unordered_set<GLenum>& supported_formats) {
553} 422 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
554 423 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
555CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
556 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
557 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
558 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
559 return {}; 424 return {};
560 } 425 }
561 426
562 CachedProgram shader = std::make_shared<OGLProgram>(); 427 auto program = std::make_shared<OGLProgram>();
563 shader->handle = glCreateProgram(); 428 program->handle = glCreateProgram();
564 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); 429 glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
565 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), 430 glProgramBinary(program->handle, precompiled_entry.binary_format,
566 static_cast<GLsizei>(dump.binary.size())); 431 precompiled_entry.binary.data(),
567 432 static_cast<GLsizei>(precompiled_entry.binary.size()));
568 GLint link_status{}; 433
569 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); 434 GLint link_status;
435 glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
570 if (link_status == GL_FALSE) { 436 if (link_status == GL_FALSE) {
571 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); 437 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
572 return {}; 438 return {};
573 } 439 }
574 440
575 return shader; 441 return program;
576}
577
578bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
579 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
580 const std::vector<ShaderDiskCacheRaw>& raws) {
581 if (callback) {
582 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
583 }
584
585 for (std::size_t i = 0; i < raws.size(); ++i) {
586 if (stop_loading) {
587 return false;
588 }
589 const auto& raw{raws[i]};
590 const u64 unique_identifier{raw.GetUniqueIdentifier()};
591 const u64 calculated_hash{
592 GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
593 if (unique_identifier != calculated_hash) {
594 LOG_ERROR(Render_OpenGL,
595 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
596 "removing shader cache",
597 raw.GetUniqueIdentifier(), calculated_hash);
598 disk_cache.InvalidateTransferable();
599 return false;
600 }
601
602 const u32 main_offset =
603 raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
604 ConstBufferLocker locker(raw.GetType());
605 const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
606 // TODO(Rodrigo): Handle VertexA shaders
607 // std::optional<ShaderIR> ir_b;
608 // if (raw.HasProgramA()) {
609 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
610 // }
611
612 UnspecializedShader unspecialized;
613 unspecialized.entries = GLShader::GetEntries(ir);
614 unspecialized.type = raw.GetType();
615 unspecialized.code = raw.GetCode();
616 unspecialized.code_b = raw.GetCodeB();
617 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
618
619 if (callback) {
620 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
621 }
622 }
623 return true;
624} 442}
625 443
626Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 444Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -648,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
648 466
649 const auto unique_identifier = GetUniqueIdentifier( 467 const auto unique_identifier = GetUniqueIdentifier(
650 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 468 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
651 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
652 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 469 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
653 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 470 const ShaderParameters params{system, disk_cache, device,
654 cpu_addr, host_ptr, unique_identifier}; 471 cpu_addr, host_ptr, unique_identifier};
655 472
656 const auto found = unspecialized_shaders.find(unique_identifier); 473 const auto found = runtime_cache.find(unique_identifier);
657 if (found == unspecialized_shaders.end()) { 474 if (found == runtime_cache.end()) {
658 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 475 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
659 std::move(code_b)); 476 std::move(code_b));
660 } else { 477 } else {
661 shader = CachedShader::CreateFromCache(params, found->second); 478 const std::size_t size_in_bytes = code.size() * sizeof(u64);
479 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
662 } 480 }
663 Register(shader); 481 Register(shader);
664 482
@@ -673,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
673 return kernel; 491 return kernel;
674 } 492 }
675 493
676 // No kernel found - create a new one 494 // No kernel found, create a new one
677 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 495 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
678 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; 496 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
679 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
680 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 497 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
681 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 498 const ShaderParameters params{system, disk_cache, device,
682 cpu_addr, host_ptr, unique_identifier}; 499 cpu_addr, host_ptr, unique_identifier};
683 500
684 const auto found = unspecialized_shaders.find(unique_identifier); 501 const auto found = runtime_cache.find(unique_identifier);
685 if (found == unspecialized_shaders.end()) { 502 if (found == runtime_cache.end()) {
686 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 503 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
687 } else { 504 } else {
688 kernel = CachedShader::CreateFromCache(params, found->second); 505 const std::size_t size_in_bytes = code.size() * sizeof(u64);
506 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
689 } 507 }
690 508
691 Register(kernel); 509 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7b1470db3..4935019fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,7 @@
22#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 24#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/const_buffer_locker.h" 25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 26#include "video_core/shader/shader_ir.h"
27 27
28namespace Core { 28namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
41struct UnspecializedShader; 41struct UnspecializedShader;
42 42
43using Shader = std::shared_ptr<CachedShader>; 43using Shader = std::shared_ptr<CachedShader>;
44using CachedProgram = std::shared_ptr<OGLProgram>;
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 45
47using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; 46struct PrecompiledShader {
48 47 std::shared_ptr<OGLProgram> program;
49struct UnspecializedShader { 48 std::shared_ptr<VideoCommon::Shader::Registry> registry;
50 GLShader::ShaderEntries entries; 49 ShaderEntries entries;
51 Tegra::Engines::ShaderType type;
52 ProgramCode code;
53 ProgramCode code_b;
54}; 50};
55 51
56struct ShaderParameters { 52struct ShaderParameters {
57 Core::System& system; 53 Core::System& system;
58 ShaderDiskCacheOpenGL& disk_cache; 54 ShaderDiskCacheOpenGL& disk_cache;
59 const PrecompiledVariants* precompiled_variants;
60 const Device& device; 55 const Device& device;
61 VAddr cpu_addr; 56 VAddr cpu_addr;
62 u8* host_ptr; 57 u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
65 60
66class CachedShader final : public RasterizerCacheObject { 61class CachedShader final : public RasterizerCacheObject {
67public: 62public:
68 static Shader CreateStageFromMemory(const ShaderParameters& params, 63 ~CachedShader();
69 Maxwell::ShaderProgram program_type,
70 ProgramCode program_code, ProgramCode program_code_b);
71 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
72 64
73 static Shader CreateFromCache(const ShaderParameters& params, 65 /// Gets the GL program handle for the shader
74 const UnspecializedShader& unspecialized); 66 GLuint GetHandle() const;
75 67
68 /// Returns the guest CPU address of the shader
76 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
77 return cpu_addr; 70 return cpu_addr;
78 } 71 }
79 72
73 /// Returns the size in bytes of the shader
80 std::size_t GetSizeInBytes() const override { 74 std::size_t GetSizeInBytes() const override {
81 return code.size() * sizeof(u64); 75 return size_in_bytes;
82 } 76 }
83 77
84 /// Gets the shader entries for the shader 78 /// Gets the shader entries for the shader
85 const GLShader::ShaderEntries& GetShaderEntries() const { 79 const ShaderEntries& GetEntries() const {
86 return entries; 80 return entries;
87 } 81 }
88 82
89 /// Gets the GL program handle for the shader 83 static Shader CreateStageFromMemory(const ShaderParameters& params,
90 GLuint GetHandle(const ProgramVariant& variant); 84 Maxwell::ShaderProgram program_type,
91 85 ProgramCode program_code, ProgramCode program_code_b);
92private: 86 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
93 struct LockerVariant {
94 std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
95 std::unordered_map<ProgramVariant, CachedProgram> programs;
96 };
97
98 explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
99 GLShader::ShaderEntries entries, ProgramCode program_code,
100 ProgramCode program_code_b);
101
102 bool EnsureValidLockerVariant();
103
104 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
105 const VideoCommon::Shader::ConstBufferLocker& locker) const;
106
107 Core::System& system;
108 ShaderDiskCacheOpenGL& disk_cache;
109 const Device& device;
110
111 VAddr cpu_addr{};
112
113 u64 unique_identifier{};
114 Tegra::Engines::ShaderType shader_type{};
115
116 GLShader::ShaderEntries entries;
117 87
118 ProgramCode code; 88 static Shader CreateFromCache(const ShaderParameters& params,
119 ProgramCode code_b; 89 const PrecompiledShader& precompiled_shader,
90 std::size_t size_in_bytes);
120 91
121 LockerVariant* curr_locker_variant = nullptr; 92private:
122 std::vector<std::unique_ptr<LockerVariant>> locker_variants; 93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96
97 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program;
123}; 102};
124 103
125class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 104class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
142 void FlushObjectInner(const Shader& object) override {} 121 void FlushObjectInner(const Shader& object) override {}
143 122
144private: 123private:
145 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, 124 std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
146 const VideoCore::DiskResourceLoadCallback& callback, 125 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
147 const std::vector<ShaderDiskCacheRaw>& raws); 126 const std::unordered_set<GLenum>& supported_formats);
148
149 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
150 const std::unordered_set<GLenum>& supported_formats);
151
152 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
153 127
154 Core::System& system; 128 Core::System& system;
155 Core::Frontend::EmuWindow& emu_window; 129 Core::Frontend::EmuWindow& emu_window;
156 const Device& device; 130 const Device& device;
157
158 ShaderDiskCacheOpenGL disk_cache; 131 ShaderDiskCacheOpenGL disk_cache;
159 132 std::unordered_map<u64, PrecompiledShader> runtime_cache;
160 PrecompiledPrograms precompiled_programs;
161 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
162
163 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
164 133
165 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 134 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
166}; 135};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3a41ed30c..3adf7f0cb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
23#include "video_core/shader/ast.h" 23#include "video_core/shader/ast.h"
24#include "video_core/shader/node.h" 24#include "video_core/shader/node.h"
25#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader/transform_feedback.h"
26 27
27namespace OpenGL::GLShader { 28namespace OpenGL {
28 29
29namespace { 30namespace {
30 31
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
36using Tegra::Shader::IpaMode; 37using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 38using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry;
39 42
40using namespace std::string_literals; 43using namespace std::string_literals;
41using namespace VideoCommon::Shader; 44using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
48 51
49enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; 52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
50 53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
51struct TextureOffset {}; 59struct TextureOffset {};
52struct TextureDerivates {}; 60struct TextureDerivates {};
53using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
56constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 64constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
57 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
58 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint
69#define itof intBitsToFloat
70#define utof uintBitsToFloat
71
72bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
73 bvec2 is_nan1 = isnan(pair1);
74 bvec2 is_nan2 = isnan(pair2);
75 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
76}}
77
78const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
79const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
80
81layout (std140, binding = {}) uniform vs_config {{
82 float y_direction;
83}};
84)";
85
59class ShaderWriter final { 86class ShaderWriter final {
60public: 87public:
61 void AddExpression(std::string_view text) { 88 void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
269 } 296 }
270} 297}
271 298
299/// Describes primitive behavior on geometry shaders
300std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
301 switch (topology) {
302 case Maxwell::PrimitiveTopology::Points:
303 return {"points", 1};
304 case Maxwell::PrimitiveTopology::Lines:
305 case Maxwell::PrimitiveTopology::LineStrip:
306 return {"lines", 2};
307 case Maxwell::PrimitiveTopology::LinesAdjacency:
308 case Maxwell::PrimitiveTopology::LineStripAdjacency:
309 return {"lines_adjacency", 4};
310 case Maxwell::PrimitiveTopology::Triangles:
311 case Maxwell::PrimitiveTopology::TriangleStrip:
312 case Maxwell::PrimitiveTopology::TriangleFan:
313 return {"triangles", 3};
314 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
315 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
316 return {"triangles_adjacency", 6};
317 default:
318 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
319 return {"points", 1};
320 }
321}
322
272/// Generates code to use for a swizzle operation. 323/// Generates code to use for a swizzle operation.
273constexpr const char* GetSwizzle(u32 element) { 324constexpr const char* GetSwizzle(std::size_t element) {
274 constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; 325 constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
275 return swizzle.at(element); 326 return swizzle.at(element);
276} 327}
277 328
329constexpr const char* GetColorSwizzle(std::size_t element) {
330 constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
331 return swizzle.at(element);
332}
333
278/// Translate topology 334/// Translate topology
279std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { 335std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
280 switch (topology) { 336 switch (topology) {
@@ -341,11 +397,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
341 return stage == ShaderType::Vertex; 397 return stage == ShaderType::Vertex;
342} 398}
343 399
400struct GenericVaryingDescription {
401 std::string name;
402 u8 first_element = 0;
403 bool is_scalar = false;
404};
405
344class GLSLDecompiler final { 406class GLSLDecompiler final {
345public: 407public:
346 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, 408 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
347 std::string suffix) 409 ShaderType stage, std::string_view identifier, std::string_view suffix)
348 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 410 : device{device}, ir{ir}, registry{registry}, stage{stage},
411 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
412 if (stage != ShaderType::Compute) {
413 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
414 }
415 }
416
417 void Decompile() {
418 DeclareHeader();
419 DeclareVertex();
420 DeclareGeometry();
421 DeclareFragment();
422 DeclareCompute();
423 DeclareInputAttributes();
424 DeclareOutputAttributes();
425 DeclareImages();
426 DeclareSamplers();
427 DeclareGlobalMemory();
428 DeclareConstantBuffers();
429 DeclareLocalMemory();
430 DeclareRegisters();
431 DeclarePredicates();
432 DeclareInternalFlags();
433 DeclareCustomVariables();
434 DeclarePhysicalAttributeReader();
435
436 code.AddLine("void main() {{");
437 ++code.scope;
438
439 if (stage == ShaderType::Vertex) {
440 code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
441 }
442
443 if (ir.IsDecompiled()) {
444 DecompileAST();
445 } else {
446 DecompileBranchMode();
447 }
448
449 --code.scope;
450 code.AddLine("}}");
451 }
452
453 std::string GetResult() {
454 return code.GetResult();
455 }
456
457private:
458 friend class ASTDecompiler;
459 friend class ExprDecompiler;
349 460
350 void DecompileBranchMode() { 461 void DecompileBranchMode() {
351 // VM's program counter 462 // VM's program counter
@@ -387,43 +498,36 @@ public:
387 498
388 void DecompileAST(); 499 void DecompileAST();
389 500
390 void Decompile() { 501 void DeclareHeader() {
391 DeclareVertex(); 502 if (!identifier.empty()) {
392 DeclareGeometry(); 503 code.AddLine("// {}", identifier);
393 DeclareRegisters(); 504 }
394 DeclareCustomVariables(); 505 code.AddLine("#version 440 core");
395 DeclarePredicates(); 506 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
396 DeclareLocalMemory(); 507 if (device.HasShaderBallot()) {
397 DeclareInternalFlags(); 508 code.AddLine("#extension GL_ARB_shader_ballot : require");
398 DeclareInputAttributes();
399 DeclareOutputAttributes();
400 DeclareConstantBuffers();
401 DeclareGlobalMemory();
402 DeclareSamplers();
403 DeclareImages();
404 DeclarePhysicalAttributeReader();
405
406 code.AddLine("void execute_{}() {{", suffix);
407 ++code.scope;
408
409 if (ir.IsDecompiled()) {
410 DecompileAST();
411 } else {
412 DecompileBranchMode();
413 } 509 }
510 if (device.HasVertexViewportLayer()) {
511 code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
512 }
513 if (device.HasImageLoadFormatted()) {
514 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
515 }
516 if (device.HasWarpIntrinsics()) {
517 code.AddLine("#extension GL_NV_gpu_shader5 : require");
518 code.AddLine("#extension GL_NV_shader_thread_group : require");
519 code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
520 }
521 // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
522 // operations) on places where we don't want to.
523 // Thanks to Ryujinx for finding this workaround.
524 code.AddLine("#pragma optionNV(fastmath off)");
414 525
415 --code.scope; 526 code.AddNewLine();
416 code.AddLine("}}");
417 }
418 527
419 std::string GetResult() { 528 code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
420 return code.GetResult();
421 } 529 }
422 530
423private:
424 friend class ASTDecompiler;
425 friend class ExprDecompiler;
426
427 void DeclareVertex() { 531 void DeclareVertex() {
428 if (!IsVertexShader(stage)) 532 if (!IsVertexShader(stage))
429 return; 533 return;
@@ -436,9 +540,15 @@ private:
436 return; 540 return;
437 } 541 }
438 542
543 const auto& info = registry.GetGraphicsInfo();
544 const auto input_topology = info.primitive_topology;
545 const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
546 max_input_vertices = max_vertices;
547 code.AddLine("layout ({}) in;", glsl_topology);
548
439 const auto topology = GetTopologyName(header.common3.output_topology); 549 const auto topology = GetTopologyName(header.common3.output_topology);
440 const auto max_vertices = header.common4.max_output_vertices.Value(); 550 const auto max_output_vertices = header.common4.max_output_vertices.Value();
441 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); 551 code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
442 code.AddNewLine(); 552 code.AddNewLine();
443 553
444 code.AddLine("in gl_PerVertex {{"); 554 code.AddLine("in gl_PerVertex {{");
@@ -450,11 +560,40 @@ private:
450 DeclareVertexRedeclarations(); 560 DeclareVertexRedeclarations();
451 } 561 }
452 562
563 void DeclareFragment() {
564 if (stage != ShaderType::Fragment) {
565 return;
566 }
567 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
568 code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
569 }
570 }
571
572 void DeclareCompute() {
573 if (stage != ShaderType::Compute) {
574 return;
575 }
576 const auto& info = registry.GetComputeInfo();
577 if (const u32 size = info.shared_memory_size_in_words; size > 0) {
578 code.AddLine("shared uint smem[{}];", size);
579 code.AddNewLine();
580 }
581 code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
582 info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
583 code.AddNewLine();
584 }
585
453 void DeclareVertexRedeclarations() { 586 void DeclareVertexRedeclarations() {
454 code.AddLine("out gl_PerVertex {{"); 587 code.AddLine("out gl_PerVertex {{");
455 ++code.scope; 588 ++code.scope;
456 589
457 code.AddLine("vec4 gl_Position;"); 590 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
591 if (!pos_xfb.empty()) {
592 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
593 }
594 const char* pos_type =
595 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
596 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
458 597
459 for (const auto attribute : ir.GetOutputAttributes()) { 598 for (const auto attribute : ir.GetOutputAttributes()) {
460 if (attribute == Attribute::Index::ClipDistances0123 || 599 if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -525,18 +664,16 @@ private:
525 } 664 }
526 665
527 void DeclareLocalMemory() { 666 void DeclareLocalMemory() {
667 u64 local_memory_size = 0;
528 if (stage == ShaderType::Compute) { 668 if (stage == ShaderType::Compute) {
529 code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); 669 local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
530 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); 670 } else {
531 code.AddLine("#endif"); 671 local_memory_size = header.GetLocalMemorySize();
532 return;
533 } 672 }
534
535 const u64 local_memory_size = header.GetLocalMemorySize();
536 if (local_memory_size == 0) { 673 if (local_memory_size == 0) {
537 return; 674 return;
538 } 675 }
539 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 676 const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
540 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); 677 code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
541 code.AddNewLine(); 678 code.AddNewLine();
542 } 679 }
@@ -589,7 +726,7 @@ private:
589 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { 726 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
590 const u32 location{GetGenericAttributeIndex(index)}; 727 const u32 location{GetGenericAttributeIndex(index)};
591 728
592 std::string name{GetInputAttribute(index)}; 729 std::string name{GetGenericInputAttribute(index)};
593 if (stage == ShaderType::Geometry) { 730 if (stage == ShaderType::Geometry) {
594 name = "gs_" + name + "[]"; 731 name = "gs_" + name + "[]";
595 } 732 }
@@ -626,9 +763,59 @@ private:
626 } 763 }
627 } 764 }
628 765
766 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
767 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
768 const auto it = transform_feedback.find(location);
769 if (it == transform_feedback.end()) {
770 return {};
771 }
772 return it->second.components;
773 }
774
775 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
776 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
777 const auto it = transform_feedback.find(location);
778 if (it == transform_feedback.end()) {
779 return {};
780 }
781
782 const VaryingTFB& tfb = it->second;
783 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
784 tfb.offset, tfb.stride);
785 }
786
629 void DeclareOutputAttribute(Attribute::Index index) { 787 void DeclareOutputAttribute(Attribute::Index index) {
630 const u32 location{GetGenericAttributeIndex(index)}; 788 static constexpr std::string_view swizzle = "xyzw";
631 code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); 789 u8 element = 0;
790 while (element < 4) {
791 auto xfb = GetTransformFeedbackDecoration(index, element);
792 if (!xfb.empty()) {
793 xfb = fmt::format(", {}", xfb);
794 }
795 const std::size_t remainder = 4 - element;
796 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
797 const char* const type = FLOAT_TYPES.at(num_components - 1);
798
799 const u32 location = GetGenericAttributeIndex(index);
800
801 GenericVaryingDescription description;
802 description.first_element = static_cast<u8>(element);
803 description.is_scalar = num_components == 1;
804 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
805 if (element != 0 || num_components != 4) {
806 const std::string_view name_swizzle = swizzle.substr(element, num_components);
807 description.name = fmt::format("{}_{}", description.name, name_swizzle);
808 }
809 for (std::size_t i = 0; i < num_components; ++i) {
810 const u8 offset = static_cast<u8>(location * 4 + element + i);
811 varying_description.insert({offset, description});
812 }
813
814 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
815 xfb, type, description.name);
816
817 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
818 }
632 } 819 }
633 820
634 void DeclareConstantBuffers() { 821 void DeclareConstantBuffers() {
@@ -925,7 +1112,8 @@ private:
925 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 1112 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
926 // set an 0x80000000 index for those and the shader fails to build. Find out why 1113 // set an 0x80000000 index for those and the shader fails to build. Find out why
927 // this happens and what's its intent. 1114 // this happens and what's its intent.
928 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); 1115 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
1116 max_input_vertices.value());
929 } 1117 }
930 return std::string(name); 1118 return std::string(name);
931 }; 1119 };
@@ -980,7 +1168,7 @@ private:
980 return {"0", Type::Int}; 1168 return {"0", Type::Int};
981 default: 1169 default:
982 if (IsGenericAttribute(attribute)) { 1170 if (IsGenericAttribute(attribute)) {
983 return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), 1171 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
984 Type::Float}; 1172 Type::Float};
985 } 1173 }
986 break; 1174 break;
@@ -1049,8 +1237,7 @@ private:
1049 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; 1237 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
1050 default: 1238 default:
1051 if (IsGenericAttribute(attribute)) { 1239 if (IsGenericAttribute(attribute)) {
1052 return { 1240 return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
1053 {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
1054 } 1241 }
1055 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); 1242 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
1056 return {}; 1243 return {};
@@ -1945,7 +2132,7 @@ private:
1945 // TODO(Subv): Figure out how dual-source blending is configured in the Switch. 2132 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
1946 for (u32 component = 0; component < 4; ++component) { 2133 for (u32 component = 0; component < 4; ++component) {
1947 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { 2134 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
1948 code.AddLine("FragColor{}[{}] = {};", render_target, component, 2135 code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
1949 SafeGetRegister(current_reg).AsFloat()); 2136 SafeGetRegister(current_reg).AsFloat());
1950 ++current_reg; 2137 ++current_reg;
1951 } 2138 }
@@ -2261,27 +2448,34 @@ private:
2261 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2448 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2262 2449
2263 std::string GetRegister(u32 index) const { 2450 std::string GetRegister(u32 index) const {
2264 return GetDeclarationWithSuffix(index, "gpr"); 2451 return AppendSuffix(index, "gpr");
2265 } 2452 }
2266 2453
2267 std::string GetCustomVariable(u32 index) const { 2454 std::string GetCustomVariable(u32 index) const {
2268 return GetDeclarationWithSuffix(index, "custom_var"); 2455 return AppendSuffix(index, "custom_var");
2269 } 2456 }
2270 2457
2271 std::string GetPredicate(Tegra::Shader::Pred pred) const { 2458 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2272 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); 2459 return AppendSuffix(static_cast<u32>(pred), "pred");
2273 } 2460 }
2274 2461
2275 std::string GetInputAttribute(Attribute::Index attribute) const { 2462 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2276 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); 2463 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2277 } 2464 }
2278 2465
2279 std::string GetOutputAttribute(Attribute::Index attribute) const { 2466 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2280 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); 2467
2468 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2469 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2470 const auto& description = varying_description.at(offset);
2471 if (description.is_scalar) {
2472 return description.name;
2473 }
2474 return fmt::format("{}[{}]", description.name, element - description.first_element);
2281 } 2475 }
2282 2476
2283 std::string GetConstBuffer(u32 index) const { 2477 std::string GetConstBuffer(u32 index) const {
2284 return GetDeclarationWithSuffix(index, "cbuf"); 2478 return AppendSuffix(index, "cbuf");
2285 } 2479 }
2286 2480
2287 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { 2481 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2488,15 @@ private:
2294 } 2488 }
2295 2489
2296 std::string GetConstBufferBlock(u32 index) const { 2490 std::string GetConstBufferBlock(u32 index) const {
2297 return GetDeclarationWithSuffix(index, "cbuf_block"); 2491 return AppendSuffix(index, "cbuf_block");
2298 } 2492 }
2299 2493
2300 std::string GetLocalMemory() const { 2494 std::string GetLocalMemory() const {
2301 return "lmem_" + suffix; 2495 if (suffix.empty()) {
2496 return "lmem";
2497 } else {
2498 return "lmem_" + std::string{suffix};
2499 }
2302 } 2500 }
2303 2501
2304 std::string GetInternalFlag(InternalFlag flag) const { 2502 std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,19 +2505,27 @@ private:
2307 const auto index = static_cast<u32>(flag); 2505 const auto index = static_cast<u32>(flag);
2308 ASSERT(index < static_cast<u32>(InternalFlag::Amount)); 2506 ASSERT(index < static_cast<u32>(InternalFlag::Amount));
2309 2507
2310 return fmt::format("{}_{}", InternalFlagNames[index], suffix); 2508 if (suffix.empty()) {
2509 return InternalFlagNames[index];
2510 } else {
2511 return fmt::format("{}_{}", InternalFlagNames[index], suffix);
2512 }
2311 } 2513 }
2312 2514
2313 std::string GetSampler(const Sampler& sampler) const { 2515 std::string GetSampler(const Sampler& sampler) const {
2314 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); 2516 return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
2315 } 2517 }
2316 2518
2317 std::string GetImage(const Image& image) const { 2519 std::string GetImage(const Image& image) const {
2318 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); 2520 return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
2319 } 2521 }
2320 2522
2321 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { 2523 std::string AppendSuffix(u32 index, std::string_view name) const {
2322 return fmt::format("{}_{}_{}", name, index, suffix); 2524 if (suffix.empty()) {
2525 return fmt::format("{}{}", name, index);
2526 } else {
2527 return fmt::format("{}{}_{}", name, index, suffix);
2528 }
2323 } 2529 }
2324 2530
2325 u32 GetNumPhysicalInputAttributes() const { 2531 u32 GetNumPhysicalInputAttributes() const {
@@ -2334,17 +2540,31 @@ private:
2334 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); 2540 return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
2335 } 2541 }
2336 2542
2543 bool IsRenderTargetEnabled(u32 render_target) const {
2544 for (u32 component = 0; component < 4; ++component) {
2545 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
2546 return true;
2547 }
2548 }
2549 return false;
2550 }
2551
2337 const Device& device; 2552 const Device& device;
2338 const ShaderIR& ir; 2553 const ShaderIR& ir;
2554 const Registry& registry;
2339 const ShaderType stage; 2555 const ShaderType stage;
2340 const std::string suffix; 2556 const std::string_view identifier;
2557 const std::string_view suffix;
2341 const Header header; 2558 const Header header;
2559 std::unordered_map<u8, VaryingTFB> transform_feedback;
2342 2560
2343 ShaderWriter code; 2561 ShaderWriter code;
2562
2563 std::optional<u32> max_input_vertices;
2344}; 2564};
2345 2565
2346std::string GetFlowVariable(u32 i) { 2566std::string GetFlowVariable(u32 index) {
2347 return fmt::format("flow_var_{}", i); 2567 return fmt::format("flow_var{}", index);
2348} 2568}
2349 2569
2350class ExprDecompiler { 2570class ExprDecompiler {
@@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() {
2531 2751
2532} // Anonymous namespace 2752} // Anonymous namespace
2533 2753
2534ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { 2754ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2535 ShaderEntries entries; 2755 ShaderEntries entries;
2536 for (const auto& cbuf : ir.GetConstantBuffers()) { 2756 for (const auto& cbuf : ir.GetConstantBuffers()) {
2537 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2757 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2555,28 +2775,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2555 return entries; 2775 return entries;
2556} 2776}
2557 2777
2558std::string GetCommonDeclarations() { 2778std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
2559 return R"(#define ftoi floatBitsToInt 2779 ShaderType stage, std::string_view identifier,
2560#define ftou floatBitsToUint 2780 std::string_view suffix) {
2561#define itof intBitsToFloat 2781 GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
2562#define utof uintBitsToFloat
2563
2564bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2565 bvec2 is_nan1 = isnan(pair1);
2566 bvec2 is_nan2 = isnan(pair2);
2567 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2568}
2569
2570const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
2571const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2572)";
2573}
2574
2575std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
2576 const std::string& suffix) {
2577 GLSLDecompiler decompiler(device, ir, stage, suffix);
2578 decompiler.Decompile(); 2782 decompiler.Decompile();
2579 return decompiler.GetResult(); 2783 return decompiler.GetResult();
2580} 2784}
2581 2785
2582} // namespace OpenGL::GLShader 2786} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 0f692c1db..e7dbd810c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,22 +6,18 @@
6 6
7#include <array> 7#include <array>
8#include <string> 8#include <string>
9#include <string_view>
9#include <utility> 10#include <utility>
10#include <vector> 11#include <vector>
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h" 14#include "video_core/engines/shader_type.h"
15#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
15 17
16namespace VideoCommon::Shader {
17class ShaderIR;
18}
19
20namespace OpenGL { 18namespace OpenGL {
21class Device;
22}
23 19
24namespace OpenGL::GLShader { 20class Device;
25 21
26using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
27using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -78,11 +74,11 @@ struct ShaderEntries {
78 std::size_t shader_length{}; 74 std::size_t shader_length{};
79}; 75};
80 76
81ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); 77ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
82
83std::string GetCommonDeclarations();
84 78
85std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 79std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
86 Tegra::Engines::ShaderType stage, const std::string& suffix); 80 const VideoCommon::Shader::Registry& registry,
81 Tegra::Engines::ShaderType stage, std::string_view identifier,
82 std::string_view suffix = {});
87 83
88} // namespace OpenGL::GLShader 84} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 1fc204f6f..9e95a122b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -31,32 +31,24 @@ namespace {
31 31
32using ShaderCacheVersionHash = std::array<u8, 64>; 32using ShaderCacheVersionHash = std::array<u8, 64>;
33 33
34enum class TransferableEntryKind : u32 {
35 Raw,
36 Usage,
37};
38
39struct ConstBufferKey { 34struct ConstBufferKey {
40 u32 cbuf{}; 35 u32 cbuf = 0;
41 u32 offset{}; 36 u32 offset = 0;
42 u32 value{}; 37 u32 value = 0;
43}; 38};
44 39
45struct BoundSamplerKey { 40struct BoundSamplerKey {
46 u32 offset{}; 41 u32 offset = 0;
47 Tegra::Engines::SamplerDescriptor sampler{}; 42 Tegra::Engines::SamplerDescriptor sampler;
48}; 43};
49 44
50struct BindlessSamplerKey { 45struct BindlessSamplerKey {
51 u32 cbuf{}; 46 u32 cbuf = 0;
52 u32 offset{}; 47 u32 offset = 0;
53 Tegra::Engines::SamplerDescriptor sampler{}; 48 Tegra::Engines::SamplerDescriptor sampler;
54}; 49};
55 50
56constexpr u32 NativeVersion = 12; 51constexpr u32 NativeVersion = 20;
57
58// Making sure sizes doesn't change by accident
59static_assert(sizeof(ProgramVariant) == 20);
60 52
61ShaderCacheVersionHash GetShaderCacheVersionHash() { 53ShaderCacheVersionHash GetShaderCacheVersionHash() {
62 ShaderCacheVersionHash hash{}; 54 ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
67 59
68} // Anonymous namespace 60} // Anonymous namespace
69 61
70ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, 62ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
71 ProgramCode code_b)
72 : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
73 code_b)} {}
74 63
75ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 64ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
76 65
77ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; 66bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
78 67 if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
79bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
80 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
81 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
82 return false; 68 return false;
83 } 69 }
84 u32 code_size{}; 70 u32 code_size;
85 u32 code_size_b{}; 71 u32 code_size_b;
86 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || 72 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
87 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { 73 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
88 return false; 74 return false;
89 } 75 }
90
91 code.resize(code_size); 76 code.resize(code_size);
92 code_b.resize(code_size_b); 77 code_b.resize(code_size_b);
93 78
94 if (file.ReadArray(code.data(), code_size) != code_size) 79 if (file.ReadArray(code.data(), code_size) != code_size) {
95 return false; 80 return false;
96 81 }
97 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { 82 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
98 return false; 83 return false;
99 } 84 }
85
86 u8 is_texture_handler_size_known;
87 u32 texture_handler_size_value;
88 u32 num_keys;
89 u32 num_bound_samplers;
90 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false;
98 }
99 if (is_texture_handler_size_known) {
100 texture_handler_size = texture_handler_size_value;
101 }
102
103 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) {
111 return false;
112 }
113 for (const auto& key : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value});
115 }
116 for (const auto& key : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler);
118 }
119 for (const auto& key : flat_bindless_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
121 }
122
100 return true; 123 return true;
101} 124}
102 125
103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 126bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
104 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || 127 if (file.WriteObject(static_cast<u32>(type)) != 1 ||
105 file.WriteObject(static_cast<u32>(code.size())) != 1 || 128 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
106 file.WriteObject(static_cast<u32>(code_b.size())) != 1) { 129 file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
107 return false; 130 return false;
108 } 131 }
109 132 if (file.WriteArray(code.data(), code.size()) != code.size()) {
110 if (file.WriteArray(code.data(), code.size()) != code.size())
111 return false; 133 return false;
112 134 }
113 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { 135 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
114 return false; 136 return false;
115 } 137 }
116 return true; 138
139 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
140 file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
141 file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false;
147 }
148
149 std::vector<ConstBufferKey> flat_keys;
150 flat_keys.reserve(keys.size());
151 for (const auto& [address, value] : keys) {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 }
154
155 std::vector<BoundSamplerKey> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
159 }
160
161 std::vector<BindlessSamplerKey> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler});
166 }
167
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size();
117} 173}
118 174
119ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} 175ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
120 176
121ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; 177ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
122 178
123std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 179std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
124ShaderDiskCacheOpenGL::LoadTransferable() {
125 // Skip games without title id 180 // Skip games without title id
126 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; 181 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
127 if (!Settings::values.use_disk_shader_cache || !has_title_id) { 182 if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
130 185
131 FileUtil::IOFile file(GetTransferablePath(), "rb"); 186 FileUtil::IOFile file(GetTransferablePath(), "rb");
132 if (!file.IsOpen()) { 187 if (!file.IsOpen()) {
133 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", 188 LOG_INFO(Render_OpenGL, "No transferable shader cache found");
134 GetTitleID());
135 is_usable = true; 189 is_usable = true;
136 return {}; 190 return {};
137 } 191 }
138 192
139 u32 version{}; 193 u32 version{};
140 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { 194 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
141 LOG_ERROR(Render_OpenGL, 195 LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
142 "Failed to get transferable cache version for title id={}, skipping",
143 GetTitleID());
144 return {}; 196 return {};
145 } 197 }
146 198
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
158 } 210 }
159 211
160 // Version is valid, load the shaders 212 // Version is valid, load the shaders
161 constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; 213 std::vector<ShaderDiskCacheEntry> entries;
162 std::vector<ShaderDiskCacheRaw> raws;
163 std::vector<ShaderDiskCacheUsage> usages;
164 while (file.Tell() < file.GetSize()) { 214 while (file.Tell() < file.GetSize()) {
165 TransferableEntryKind kind{}; 215 ShaderDiskCacheEntry& entry = entries.emplace_back();
166 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 216 if (!entry.Load(file)) {
167 LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); 217 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
168 return {};
169 }
170
171 switch (kind) {
172 case TransferableEntryKind::Raw: {
173 ShaderDiskCacheRaw entry;
174 if (!entry.Load(file)) {
175 LOG_ERROR(Render_OpenGL, error_loading);
176 return {};
177 }
178 transferable.insert({entry.GetUniqueIdentifier(), {}});
179 raws.push_back(std::move(entry));
180 break;
181 }
182 case TransferableEntryKind::Usage: {
183 ShaderDiskCacheUsage usage;
184
185 u32 num_keys{};
186 u32 num_bound_samplers{};
187 u32 num_bindless_samplers{};
188 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
189 file.ReadArray(&usage.variant, 1) != 1 ||
190 file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
191 file.ReadArray(&num_bound_samplers, 1) != 1 ||
192 file.ReadArray(&num_bindless_samplers, 1) != 1) {
193 LOG_ERROR(Render_OpenGL, error_loading);
194 return {};
195 }
196
197 std::vector<ConstBufferKey> keys(num_keys);
198 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
199 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
200 if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
201 file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
202 bound_samplers.size() ||
203 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
204 bindless_samplers.size()) {
205 LOG_ERROR(Render_OpenGL, error_loading);
206 return {};
207 }
208 for (const auto& key : keys) {
209 usage.keys.insert({{key.cbuf, key.offset}, key.value});
210 }
211 for (const auto& key : bound_samplers) {
212 usage.bound_samplers.emplace(key.offset, key.sampler);
213 }
214 for (const auto& key : bindless_samplers) {
215 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
216 }
217
218 usages.push_back(std::move(usage));
219 break;
220 }
221 default:
222 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
223 static_cast<u32>(kind));
224 return {}; 218 return {};
225 } 219 }
226 } 220 }
227 221
228 is_usable = true; 222 is_usable = true;
229 return {{std::move(raws), std::move(usages)}}; 223 return {std::move(entries)};
230} 224}
231 225
232std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> 226std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
233ShaderDiskCacheOpenGL::LoadPrecompiled() {
234 if (!is_usable) { 227 if (!is_usable) {
235 return {}; 228 return {};
236 } 229 }
237 230
238 std::string path = GetPrecompiledPath(); 231 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
239 FileUtil::IOFile file(path, "rb");
240 if (!file.IsOpen()) { 232 if (!file.IsOpen()) {
241 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 233 LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
242 GetTitleID());
243 return {}; 234 return {};
244 } 235 }
245 236
246 const auto result = LoadPrecompiledFile(file); 237 if (const auto result = LoadPrecompiledFile(file)) {
247 if (!result) { 238 return *result;
248 LOG_INFO(Render_OpenGL,
249 "Failed to load precompiled cache for game with title id={}, removing",
250 GetTitleID());
251 file.Close();
252 InvalidatePrecompiled();
253 return {};
254 } 239 }
255 return *result; 240
241 LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
242 file.Close();
243 InvalidatePrecompiled();
244 return {};
256} 245}
257 246
258std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 247std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
259ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 248 FileUtil::IOFile& file) {
260 // Read compressed file from disk and decompress to virtual precompiled cache file 249 // Read compressed file from disk and decompress to virtual precompiled cache file
261 std::vector<u8> compressed(file.GetSize()); 250 std::vector<u8> compressed(file.GetSize());
262 file.ReadBytes(compressed.data(), compressed.size()); 251 file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
275 return {}; 264 return {};
276 } 265 }
277 266
278 ShaderDumpsMap dumps; 267 std::vector<ShaderDiskCachePrecompiled> entries;
279 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 268 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
280 u32 num_keys{}; 269 u32 binary_size;
281 u32 num_bound_samplers{}; 270 auto& entry = entries.emplace_back();
282 u32 num_bindless_samplers{}; 271 if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
283 ShaderDiskCacheUsage usage; 272 !LoadObjectFromPrecompiled(entry.binary_format) ||
284 if (!LoadObjectFromPrecompiled(usage.unique_identifier) || 273 !LoadObjectFromPrecompiled(binary_size)) {
285 !LoadObjectFromPrecompiled(usage.variant) ||
286 !LoadObjectFromPrecompiled(usage.bound_buffer) ||
287 !LoadObjectFromPrecompiled(num_keys) ||
288 !LoadObjectFromPrecompiled(num_bound_samplers) ||
289 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
290 return {};
291 }
292 std::vector<ConstBufferKey> keys(num_keys);
293 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
294 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
295 if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
296 !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
297 bound_samplers.size() ||
298 !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
299 bindless_samplers.size()) {
300 return {};
301 }
302 for (const auto& key : keys) {
303 usage.keys.insert({{key.cbuf, key.offset}, key.value});
304 }
305 for (const auto& key : bound_samplers) {
306 usage.bound_samplers.emplace(key.offset, key.sampler);
307 }
308 for (const auto& key : bindless_samplers) {
309 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
310 }
311
312 ShaderDiskCacheDump dump;
313 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
314 return {};
315 }
316
317 u32 binary_length{};
318 if (!LoadObjectFromPrecompiled(binary_length)) {
319 return {}; 274 return {};
320 } 275 }
321 276
322 dump.binary.resize(binary_length); 277 entry.binary.resize(binary_size);
323 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { 278 if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
324 return {}; 279 return {};
325 } 280 }
326
327 dumps.emplace(std::move(usage), dump);
328 } 281 }
329 return dumps; 282 return entries;
330} 283}
331 284
332void ShaderDiskCacheOpenGL::InvalidateTransferable() { 285void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
346 } 299 }
347} 300}
348 301
349void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { 302void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
350 if (!is_usable) { 303 if (!is_usable) {
351 return; 304 return;
352 } 305 }
353 306
354 const u64 id = entry.GetUniqueIdentifier(); 307 const u64 id = entry.unique_identifier;
355 if (transferable.find(id) != transferable.end()) { 308 if (stored_transferable.find(id) != stored_transferable.end()) {
356 // The shader already exists 309 // The shader already exists
357 return; 310 return;
358 } 311 }
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
361 if (!file.IsOpen()) { 314 if (!file.IsOpen()) {
362 return; 315 return;
363 } 316 }
364 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { 317 if (!entry.Save(file)) {
365 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); 318 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
366 file.Close(); 319 file.Close();
367 InvalidateTransferable(); 320 InvalidateTransferable();
368 return; 321 return;
369 } 322 }
370 transferable.insert({id, {}});
371}
372 323
373void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { 324 stored_transferable.insert(id);
374 if (!is_usable) {
375 return;
376 }
377
378 const auto it = transferable.find(usage.unique_identifier);
379 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
380
381 auto& usages{it->second};
382 if (usages.find(usage) != usages.end()) {
383 // Skip this variant since the shader is already stored.
384 return;
385 }
386 usages.insert(usage);
387
388 FileUtil::IOFile file = AppendTransferableFile();
389 if (!file.IsOpen())
390 return;
391 const auto Close = [&] {
392 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
393 file.Close();
394 InvalidateTransferable();
395 };
396
397 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
398 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
399 file.WriteObject(usage.bound_buffer) != 1 ||
400 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
401 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
402 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
403 Close();
404 return;
405 }
406 for (const auto& [pair, value] : usage.keys) {
407 const auto [cbuf, offset] = pair;
408 if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
409 Close();
410 return;
411 }
412 }
413 for (const auto& [offset, sampler] : usage.bound_samplers) {
414 if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
415 Close();
416 return;
417 }
418 }
419 for (const auto& [pair, sampler] : usage.bindless_samplers) {
420 const auto [cbuf, offset] = pair;
421 if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
422 Close();
423 return;
424 }
425 }
426} 325}
427 326
428void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { 327void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
429 if (!is_usable) { 328 if (!is_usable) {
430 return; 329 return;
431 } 330 }
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
437 SavePrecompiledHeaderToVirtualPrecompiledCache(); 336 SavePrecompiledHeaderToVirtualPrecompiledCache();
438 } 337 }
439 338
440 GLint binary_length{}; 339 GLint binary_length;
441 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 340 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
442 341
443 GLenum binary_format{}; 342 GLenum binary_format;
444 std::vector<u8> binary(binary_length); 343 std::vector<u8> binary(binary_length);
445 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 344 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
446 345
447 const auto Close = [&] { 346 if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
347 !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
348 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
448 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", 349 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
449 usage.unique_identifier); 350 unique_identifier);
450 InvalidatePrecompiled(); 351 InvalidatePrecompiled();
451 };
452
453 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
454 !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
455 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
456 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
457 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
458 Close();
459 return;
460 }
461 for (const auto& [pair, value] : usage.keys) {
462 const auto [cbuf, offset] = pair;
463 if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
464 Close();
465 return;
466 }
467 }
468 for (const auto& [offset, sampler] : usage.bound_samplers) {
469 if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
470 Close();
471 return;
472 }
473 }
474 for (const auto& [pair, sampler] : usage.bindless_samplers) {
475 const auto [cbuf, offset] = pair;
476 if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
477 Close();
478 return;
479 }
480 }
481 if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
482 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
483 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
484 Close();
485 } 352 }
486} 353}
487 354
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
534 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { 401 if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
535 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", 402 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
536 precompiled_path); 403 precompiled_path);
537 return;
538 } 404 }
539} 405}
540 406
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ef2371f6d..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -19,8 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/shader/registry.h"
23#include "video_core/shader/const_buffer_locker.h"
24 23
25namespace Core { 24namespace Core {
26class System; 25class System;
@@ -32,139 +31,39 @@ class IOFile;
32 31
33namespace OpenGL { 32namespace OpenGL {
34 33
35struct ShaderDiskCacheUsage;
36struct ShaderDiskCacheDump;
37
38using ProgramCode = std::vector<u64>; 34using ProgramCode = std::vector<u64>;
39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
40
41/// Describes the different variants a program can be compiled with.
42struct ProgramVariant final {
43 ProgramVariant() = default;
44
45 /// Graphics constructor.
46 explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
47 : primitive_mode{primitive_mode} {}
48
49 /// Compute constructor.
50 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
51 u32 local_memory_size) noexcept
52 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
53 shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
54
55 // Graphics specific parameters.
56 GLenum primitive_mode{};
57
58 // Compute specific parameters.
59 u32 block_x{};
60 u16 block_y{};
61 u16 block_z{};
62 u32 shared_memory_size{};
63 u32 local_memory_size{};
64
65 bool operator==(const ProgramVariant& rhs) const noexcept {
66 return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
67 local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
68 rhs.block_z, rhs.shared_memory_size,
69 rhs.local_memory_size);
70 }
71
72 bool operator!=(const ProgramVariant& rhs) const noexcept {
73 return !operator==(rhs);
74 }
75};
76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77
78/// Describes how a shader is used.
79struct ShaderDiskCacheUsage {
80 u64 unique_identifier{};
81 ProgramVariant variant;
82 u32 bound_buffer{};
83 VideoCommon::Shader::KeyMap keys;
84 VideoCommon::Shader::BoundSamplerMap bound_samplers;
85 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
86
87 bool operator==(const ShaderDiskCacheUsage& rhs) const {
88 return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
89 std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
90 rhs.bindless_samplers);
91 }
92
93 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
94 return !operator==(rhs);
95 }
96};
97
98} // namespace OpenGL
99
100namespace std {
101
102template <>
103struct hash<OpenGL::ProgramVariant> {
104 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
105 return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
106 static_cast<std::size_t>(variant.block_x) ^
107 (static_cast<std::size_t>(variant.block_y) << 32) ^
108 (static_cast<std::size_t>(variant.block_z) << 48) ^
109 (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
110 (static_cast<std::size_t>(variant.local_memory_size) << 36);
111 }
112};
113
114template <>
115struct hash<OpenGL::ShaderDiskCacheUsage> {
116 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
117 return static_cast<std::size_t>(usage.unique_identifier) ^
118 std::hash<OpenGL::ProgramVariant>{}(usage.variant);
119 }
120};
121
122} // namespace std
123
124namespace OpenGL {
125 35
126/// Describes a shader how it's used by the guest GPU 36/// Describes a shader and how it's used by the guest GPU
127class ShaderDiskCacheRaw { 37struct ShaderDiskCacheEntry {
128public: 38 ShaderDiskCacheEntry();
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, 39 ~ShaderDiskCacheEntry();
130 ProgramCode code, ProgramCode code_b = {});
131 ShaderDiskCacheRaw();
132 ~ShaderDiskCacheRaw();
133 40
134 bool Load(FileUtil::IOFile& file); 41 bool Load(FileUtil::IOFile& file);
135 42
136 bool Save(FileUtil::IOFile& file) const; 43 bool Save(FileUtil::IOFile& file) const;
137 44
138 u64 GetUniqueIdentifier() const {
139 return unique_identifier;
140 }
141
142 bool HasProgramA() const { 45 bool HasProgramA() const {
143 return !code.empty() && !code_b.empty(); 46 return !code.empty() && !code_b.empty();
144 } 47 }
145 48
146 Tegra::Engines::ShaderType GetType() const {
147 return type;
148 }
149
150 const ProgramCode& GetCode() const {
151 return code;
152 }
153
154 const ProgramCode& GetCodeB() const {
155 return code_b;
156 }
157
158private:
159 u64 unique_identifier{};
160 Tegra::Engines::ShaderType type{}; 49 Tegra::Engines::ShaderType type{};
161 ProgramCode code; 50 ProgramCode code;
162 ProgramCode code_b; 51 ProgramCode code_b;
52
53 u64 unique_identifier = 0;
54 std::optional<u32> texture_handler_size;
55 u32 bound_buffer = 0;
56 VideoCommon::Shader::GraphicsInfo graphics_info;
57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
163}; 61};
164 62
165/// Contains an OpenGL dumped binary program 63/// Contains an OpenGL dumped binary program
166struct ShaderDiskCacheDump { 64struct ShaderDiskCachePrecompiled {
167 GLenum binary_format{}; 65 u64 unique_identifier = 0;
66 GLenum binary_format = 0;
168 std::vector<u8> binary; 67 std::vector<u8> binary;
169}; 68};
170 69
@@ -174,11 +73,10 @@ public:
174 ~ShaderDiskCacheOpenGL(); 73 ~ShaderDiskCacheOpenGL();
175 74
176 /// Loads transferable cache. If file has a old version or on failure, it deletes the file. 75 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
177 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> 76 std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
178 LoadTransferable();
179 77
180 /// Loads current game's precompiled cache. Invalidates on failure. 78 /// Loads current game's precompiled cache. Invalidates on failure.
181 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); 79 std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
182 80
183 /// Removes the transferable (and precompiled) cache file. 81 /// Removes the transferable (and precompiled) cache file.
184 void InvalidateTransferable(); 82 void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
187 void InvalidatePrecompiled(); 85 void InvalidatePrecompiled();
188 86
189 /// Saves a raw dump to the transferable file. Checks for collisions. 87 /// Saves a raw dump to the transferable file. Checks for collisions.
190 void SaveRaw(const ShaderDiskCacheRaw& entry); 88 void SaveEntry(const ShaderDiskCacheEntry& entry);
191
192 /// Saves shader usage to the transferable file. Does not check for collisions.
193 void SaveUsage(const ShaderDiskCacheUsage& usage);
194 89
195 /// Saves a dump entry to the precompiled file. Does not check for collisions. 90 /// Saves a dump entry to the precompiled file. Does not check for collisions.
196 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 91 void SavePrecompiled(u64 unique_identifier, GLuint program);
197 92
198 /// Serializes virtual precompiled shader cache file to real file 93 /// Serializes virtual precompiled shader cache file to real file
199 void SaveVirtualPrecompiledFile(); 94 void SaveVirtualPrecompiledFile();
200 95
201private: 96private:
202 /// Loads the transferable cache. Returns empty on failure. 97 /// Loads the transferable cache. Returns empty on failure.
203 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> 98 std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
204 LoadPrecompiledFile(FileUtil::IOFile& file); 99 FileUtil::IOFile& file);
205 100
206 /// Opens current game's transferable file and write it's header if it doesn't exist 101 /// Opens current game's transferable file and write it's header if it doesn't exist
207 FileUtil::IOFile AppendTransferableFile() const; 102 FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
270 std::size_t precompiled_cache_virtual_file_offset = 0; 165 std::size_t precompiled_cache_virtual_file_offset = 0;
271 166
272 // Stored transferable shaders 167 // Stored transferable shaders
273 std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; 168 std::unordered_set<u64> stored_transferable;
274 169
275 // The cache has been loaded at boot 170 // The cache has been loaded at boot
276 bool is_usable{}; 171 bool is_usable{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
deleted file mode 100644
index 34946fb47..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/gl_shader_gen.h"
14#include "video_core/shader/shader_ir.h"
15
16namespace OpenGL::GLShader {
17
18using Tegra::Engines::Maxwell3D;
19using Tegra::Engines::ShaderType;
20using VideoCommon::Shader::CompileDepth;
21using VideoCommon::Shader::CompilerSettings;
22using VideoCommon::Shader::ProgramCode;
23using VideoCommon::Shader::ShaderIR;
24
25std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
26 std::string out = GetCommonDeclarations();
27 out += fmt::format(R"(
28layout (std140, binding = {}) uniform vs_config {{
29 float y_direction;
30}};
31
32)",
33 EmulationUniformBlockBinding);
34 out += Decompile(device, ir, ShaderType::Vertex, "vertex");
35 if (ir_b) {
36 out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
37 }
38
39 out += R"(
40void main() {
41 gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
42 execute_vertex();
43)";
44 if (ir_b) {
45 out += " execute_vertex_b();";
46 }
47 out += "}\n";
48 return out;
49}
50
51std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
52 std::string out = GetCommonDeclarations();
53 out += fmt::format(R"(
54layout (std140, binding = {}) uniform gs_config {{
55 float y_direction;
56}};
57
58)",
59 EmulationUniformBlockBinding);
60 out += Decompile(device, ir, ShaderType::Geometry, "geometry");
61
62 out += R"(
63void main() {
64 execute_geometry();
65}
66)";
67 return out;
68}
69
70std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
71 std::string out = GetCommonDeclarations();
72 out += fmt::format(R"(
73layout (location = 0) out vec4 FragColor0;
74layout (location = 1) out vec4 FragColor1;
75layout (location = 2) out vec4 FragColor2;
76layout (location = 3) out vec4 FragColor3;
77layout (location = 4) out vec4 FragColor4;
78layout (location = 5) out vec4 FragColor5;
79layout (location = 6) out vec4 FragColor6;
80layout (location = 7) out vec4 FragColor7;
81
82layout (std140, binding = {}) uniform fs_config {{
83 float y_direction;
84}};
85
86)",
87 EmulationUniformBlockBinding);
88 out += Decompile(device, ir, ShaderType::Fragment, "fragment");
89
90 out += R"(
91void main() {
92 execute_fragment();
93}
94)";
95 return out;
96}
97
98std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
99 std::string out = GetCommonDeclarations();
100 out += Decompile(device, ir, ShaderType::Compute, "compute");
101 out += R"(
102void main() {
103 execute_compute();
104}
105)";
106 return out;
107}
108
109} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
deleted file mode 100644
index cba2be9f9..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ /dev/null
@@ -1,34 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_shader_decompiler.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace OpenGL {
14class Device;
15}
16
17namespace OpenGL::GLShader {
18
19using VideoCommon::Shader::ProgramCode;
20using VideoCommon::Shader::ShaderIR;
21
22/// Generates the GLSL vertex shader program source code for the given VS program
23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
24
25/// Generates the GLSL geometry shader program source code for the given GS program
26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
27
28/// Generates the GLSL fragment shader program source code for the given FS program
29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
30
31/// Generates the GLSL compute shader program source code for the given CS program
32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
33
34} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..3847bd722 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
107 features.occlusionQueryPrecise = true; 107 features.occlusionQueryPrecise = true;
108 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
109 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
110 features.shaderStorageImageReadWithoutFormat = 110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 is_shader_storage_img_read_without_format_supported;
112 features.shaderStorageImageWriteWithoutFormat = true; 111 features.shaderStorageImageWriteWithoutFormat = true;
113 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 112 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
114 113
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
148 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
149 } 148 }
150 149
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) {
152 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback);
155 } else {
156 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
157 }
158
151 if (!ext_depth_range_unrestricted) { 159 if (!ext_depth_range_unrestricted) {
152 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
153 } 161 }
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
385 } 393 }
386 }; 394 };
387 395
388 extensions.reserve(14); 396 extensions.reserve(15);
389 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
390 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
391 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
397 405
398 [[maybe_unused]] const bool nsight = 406 [[maybe_unused]] const bool nsight =
399 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
400 bool khr_shader_float16_int8{}; 408 bool has_khr_shader_float16_int8{};
401 bool ext_subgroup_size_control{}; 409 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{};
402 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
403 Test(extension, khr_uniform_buffer_standard_layout, 412 Test(extension, khr_uniform_buffer_standard_layout,
404 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
405 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
415 false);
406 Test(extension, ext_depth_range_unrestricted, 416 Test(extension, ext_depth_range_unrestricted,
407 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 417 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
408 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 418 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
409 Test(extension, ext_shader_viewport_index_layer, 419 Test(extension, ext_shader_viewport_index_layer,
410 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); 420 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
411 Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, 421 Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
422 false);
423 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
412 false); 424 false);
413 if (Settings::values.renderer_debug) { 425 if (Settings::values.renderer_debug) {
414 Test(extension, nv_device_diagnostic_checkpoints, 426 Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
416 } 428 }
417 } 429 }
418 430
419 if (khr_shader_float16_int8) { 431 if (has_khr_shader_float16_int8) {
420 is_float16_supported = 432 is_float16_supported =
421 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
422 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
423 } 435 }
424 436
425 if (ext_subgroup_size_control) { 437 if (has_ext_subgroup_size_control) {
426 const auto features = 438 const auto features =
427 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
428 const auto properties = 440 const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
439 is_warp_potentially_bigger = true; 451 is_warp_potentially_bigger = true;
440 } 452 }
441 453
454 if (has_ext_transform_feedback) {
455 const auto features =
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
457 const auto properties =
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
459
460 if (features.transformFeedback && features.geometryStreams &&
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true;
465 }
466 }
467
442 return extensions; 468 return extensions;
443} 469}
444 470
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
467 493
468void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
469 const auto supported_features{physical.getFeatures(dldi)}; 495 const auto supported_features{physical.getFeatures(dldi)};
470 is_shader_storage_img_read_without_format_supported = 496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
471 supported_features.shaderStorageImageReadWithoutFormat;
472 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
473} 498}
474 499
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns true if Shader storage Image Read Without Format supported.
126 bool IsShaderStorageImageReadWithoutFormatSupported() const {
127 return is_shader_storage_img_read_without_format_supported;
128 }
129
130 /// Returns true if ASTC is natively supported. 125 /// Returns true if ASTC is natively supported.
131 bool IsOptimalAstcSupported() const { 126 bool IsOptimalAstcSupported() const {
132 return is_optimal_astc_supported; 127 return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
147 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
148 } 143 }
149 144
145 /// Returns true if formatless image load is supported.
146 bool IsFormatlessImageLoadSupported() const {
147 return is_formatless_image_load_supported;
148 }
149
150 /// Returns true if the device supports VK_EXT_scalar_block_layout. 150 /// Returns true if the device supports VK_EXT_scalar_block_layout.
151 bool IsKhrUniformBufferStandardLayoutSupported() const { 151 bool IsKhrUniformBufferStandardLayoutSupported() const {
152 return khr_uniform_buffer_standard_layout; 152 return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
167 return ext_shader_viewport_index_layer; 167 return ext_shader_viewport_index_layer;
168 } 168 }
169 169
170 /// Returns true if the device supports VK_EXT_transform_feedback.
171 bool IsExtTransformFeedbackSupported() const {
172 return ext_transform_feedback;
173 }
174
170 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. 175 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
171 bool IsNvDeviceDiagnosticCheckpoints() const { 176 bool IsNvDeviceDiagnosticCheckpoints() const {
172 return nv_device_diagnostic_checkpoints; 177 return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
214 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
215 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
216 221
217 const vk::PhysicalDevice physical; ///< Physical device. 222 const vk::PhysicalDevice physical; ///< Physical device.
218 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 223 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
219 vk::PhysicalDeviceProperties properties; ///< Device properties. 224 vk::PhysicalDeviceProperties properties; ///< Device properties.
220 UniqueDevice logical; ///< Logical device. 225 UniqueDevice logical; ///< Logical device.
221 vk::Queue graphics_queue; ///< Main graphics queue. 226 vk::Queue graphics_queue; ///< Main graphics queue.
222 vk::Queue present_queue; ///< Main present queue. 227 vk::Queue present_queue; ///< Main present queue.
223 u32 graphics_family{}; ///< Main graphics queue family index. 228 u32 graphics_family{}; ///< Main graphics queue family index.
224 u32 present_family{}; ///< Main present queue family index. 229 u32 present_family{}; ///< Main present queue family index.
225 vk::DriverIdKHR driver_id{}; ///< Driver ID. 230 vk::DriverIdKHR driver_id{}; ///< Driver ID.
226 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
227 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
228 bool is_float16_supported{}; ///< Support for float16 arithmetics. 233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
230 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
231 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 238 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 239 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
240 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
234 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 241 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
235 bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
236 ///< image read without format
237 242
238 // Telemetry parameters 243 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 244 std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 144e1e007..056ef495c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr,
162 ProgramCode program_code, u32 main_offset) 162 ProgramCode program_code, u32 main_offset)
163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
164 program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, 164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
165 shader_ir{this->program_code, main_offset, compiler_settings, locker}, 165 shader_ir{this->program_code, main_offset, compiler_settings, registry},
166 entries{GenerateShaderEntries(shader_ir)} {} 166 entries{GenerateShaderEntries(shader_ir)} {}
167 167
168CachedShader::~CachedShader() = default; 168CachedShader::~CachedShader() = default;
@@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
273 specialization.workgroup_size = key.workgroup_size; 273 specialization.workgroup_size = key.workgroup_size;
274 specialization.shared_memory_size = key.shared_memory_size; 274 specialization.shared_memory_size = key.shared_memory_size;
275 275
276 const SPIRVShader spirv_shader{ 276 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
277 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), 277 shader->GetRegistry(), specialization),
278 shader->GetEntries()}; 278 shader->GetEntries()};
279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, 279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
280 update_descriptor_queue, spirv_shader); 280 update_descriptor_queue, spirv_shader);
281 return *entry; 281 return *entry;
@@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
324 const auto& gpu = system.GPU().Maxwell3D(); 324 const auto& gpu = system.GPU().Maxwell3D();
325 325
326 Specialization specialization; 326 Specialization specialization;
327 specialization.primitive_topology = fixed_state.input_assembly.topology; 327 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
328 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
329 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 328 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
330 specialization.point_size = fixed_state.input_assembly.point_size; 329 specialization.point_size = fixed_state.input_assembly.point_size;
331 } 330 }
@@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
333 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 332 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
334 } 333 }
335 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 334 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
336 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
337 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
338 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
339 335
340 SPIRVProgram program; 336 SPIRVProgram program;
341 std::vector<vk::DescriptorSetLayoutBinding> bindings; 337 std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
356 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
357 const auto program_type = GetShaderType(program_enum); 353 const auto program_type = GetShaderType(program_enum);
358 const auto& entries = shader->GetEntries(); 354 const auto& entries = shader->GetEntries();
359 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), 355 program[stage] = {
360 entries}; 356 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
357 entries};
361 358
362 if (program_enum == Maxwell::ShaderProgram::VertexA) { 359 if (program_enum == Maxwell::ShaderProgram::VertexA) {
363 // VertexB was combined with VertexA, so we skip the VertexB iteration 360 // VertexB was combined with VertexA, so we skip the VertexB iteration
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 92a670cc7..21340c9a4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -25,7 +25,7 @@
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h" 26#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 27#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
28#include "video_core/shader/const_buffer_locker.h" 28#include "video_core/shader/registry.h"
29#include "video_core/shader/shader_ir.h" 29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h" 30#include "video_core/surface.h"
31 31
@@ -132,6 +132,10 @@ public:
132 return shader_ir; 132 return shader_ir;
133 } 133 }
134 134
135 const VideoCommon::Shader::Registry& GetRegistry() const {
136 return registry;
137 }
138
135 const VideoCommon::Shader::ShaderIR& GetIR() const { 139 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir; 140 return shader_ir;
137 } 141 }
@@ -147,7 +151,7 @@ private:
147 GPUVAddr gpu_addr{}; 151 GPUVAddr gpu_addr{};
148 VAddr cpu_addr{}; 152 VAddr cpu_addr{};
149 ProgramCode program_code; 153 ProgramCode program_code;
150 VideoCommon::Shader::ConstBufferLocker locker; 154 VideoCommon::Shader::Registry registry;
151 VideoCommon::Shader::ShaderIR shader_ir; 155 VideoCommon::Shader::ShaderIR shader_ir;
152 ShaderEntries entries; 156 ShaderEntries entries;
153}; 157};
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2bcb17b56..f889019c1 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
347 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 347 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
348 } 348 }
349 349
350 BeginTransformFeedback();
351
350 const auto pipeline_layout = pipeline.GetLayout(); 352 const auto pipeline_layout = pipeline.GetLayout();
351 const auto descriptor_set = pipeline.CommitDescriptorSet(); 353 const auto descriptor_set = pipeline.CommitDescriptorSet();
352 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 354 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
356 } 358 }
357 draw_params.Draw(cmdbuf, dld); 359 draw_params.Draw(cmdbuf, dld);
358 }); 360 });
361
362 EndTransformFeedback();
359} 363}
360 364
361void RasterizerVulkan::Clear() { 365void RasterizerVulkan::Clear() {
@@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
738 UpdateStencilFaces(regs); 742 UpdateStencilFaces(regs);
739} 743}
740 744
745void RasterizerVulkan::BeginTransformFeedback() {
746 const auto& regs = system.GPU().Maxwell3D().regs;
747 if (regs.tfb_enabled == 0) {
748 return;
749 }
750
751 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
752 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
753 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
754
755 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
756 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
757 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
758
759 const auto& binding = regs.tfb_bindings[0];
760 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
761 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
762
763 const GPUVAddr gpu_addr = binding.Address();
764 const std::size_t size = binding.buffer_size;
765 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
766
767 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
768 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
769 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
770 });
771}
772
773void RasterizerVulkan::EndTransformFeedback() {
774 const auto& regs = system.GPU().Maxwell3D().regs;
775 if (regs.tfb_enabled == 0) {
776 return;
777 }
778
779 scheduler.Record(
780 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
781}
782
741void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 783void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
742 BufferBindings& buffer_bindings) { 784 BufferBindings& buffer_bindings) {
743 const auto& regs = system.GPU().Maxwell3D().regs; 785 const auto& regs = system.GPU().Maxwell3D().regs;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 96ea05f0a..b2e73d98d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -169,6 +169,10 @@ private:
169 169
170 void UpdateDynamicStates(); 170 void UpdateDynamicStates();
171 171
172 void BeginTransformFeedback();
173
174 void EndTransformFeedback();
175
172 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); 176 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
173 177
174 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 178 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cfcca5af0..b2c298051 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <functional> 5#include <functional>
6#include <limits> 6#include <limits>
7#include <map> 7#include <map>
8#include <optional>
8#include <type_traits> 9#include <type_traits>
10#include <unordered_map>
9#include <utility> 11#include <utility>
10 12
11#include <fmt/format.h> 13#include <fmt/format.h>
@@ -24,6 +26,7 @@
24#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
25#include "video_core/shader/node.h" 27#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h"
27 30
28namespace Vulkan { 31namespace Vulkan {
29 32
@@ -93,6 +96,12 @@ struct VertexIndices {
93 std::optional<u32> clip_distances; 96 std::optional<u32> clip_distances;
94}; 97};
95 98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
96spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const Sampler& sampler) {
97 ASSERT(!sampler.IsBuffer()); 106 ASSERT(!sampler.IsBuffer());
98 switch (sampler.GetType()) { 107 switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
266class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
267public: 276public:
268 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 277 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
269 const Specialization& specialization) 278 const Registry& registry, const Specialization& specialization)
270 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, 279 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
271 specialization{specialization} { 280 registry{registry}, specialization{specialization} {
281 if (stage != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
283 }
284
272 AddCapability(spv::Capability::Shader); 285 AddCapability(spv::Capability::Shader);
273 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); 286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
274 AddCapability(spv::Capability::ImageQuery); 287 AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
286 AddExtension("SPV_KHR_variable_pointers"); 299 AddExtension("SPV_KHR_variable_pointers");
287 AddExtension("SPV_KHR_shader_draw_parameters"); 300 AddExtension("SPV_KHR_shader_draw_parameters");
288 301
302 if (!transform_feedback.empty()) {
303 if (device.IsExtTransformFeedbackSupported()) {
304 AddCapability(spv::Capability::TransformFeedback);
305 } else {
306 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
307 "supported on this device");
308 }
309 }
310
289 if (ir.UsesLayer() || ir.UsesViewportIndex()) { 311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
290 if (ir.UsesViewportIndex()) { 312 if (ir.UsesViewportIndex()) {
291 AddCapability(spv::Capability::MultiViewport); 313 AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
296 } 318 }
297 } 319 }
298 320
299 if (device.IsShaderStorageImageReadWithoutFormatSupported()) { 321 if (device.IsFormatlessImageLoadSupported()) {
300 AddCapability(spv::Capability::StorageImageReadWithoutFormat); 322 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
301 } 323 }
302 324
@@ -318,25 +340,29 @@ public:
318 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 340 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
319 header.common2.threads_per_input_primitive); 341 header.common2.threads_per_input_primitive);
320 break; 342 break;
321 case ShaderType::TesselationEval: 343 case ShaderType::TesselationEval: {
344 const auto& info = registry.GetGraphicsInfo();
322 AddCapability(spv::Capability::Tessellation); 345 AddCapability(spv::Capability::Tessellation);
323 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); 346 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
324 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); 347 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
325 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); 348 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
326 AddExecutionMode(main, specialization.tessellation.clockwise 349 AddExecutionMode(main, info.tessellation_clockwise
327 ? spv::ExecutionMode::VertexOrderCw 350 ? spv::ExecutionMode::VertexOrderCw
328 : spv::ExecutionMode::VertexOrderCcw); 351 : spv::ExecutionMode::VertexOrderCcw);
329 break; 352 break;
330 case ShaderType::Geometry: 353 }
354 case ShaderType::Geometry: {
355 const auto& info = registry.GetGraphicsInfo();
331 AddCapability(spv::Capability::Geometry); 356 AddCapability(spv::Capability::Geometry);
332 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); 357 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
333 AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); 358 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
334 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); 359 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
335 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 360 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
336 header.common4.max_output_vertices); 361 header.common4.max_output_vertices);
337 // TODO(Rodrigo): Where can we get this info from? 362 // TODO(Rodrigo): Where can we get this info from?
338 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); 363 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
339 break; 364 break;
365 }
340 case ShaderType::Fragment: 366 case ShaderType::Fragment:
341 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); 367 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
342 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); 368 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
545 if (stage != ShaderType::Geometry) { 571 if (stage != ShaderType::Geometry) {
546 return; 572 return;
547 } 573 }
548 const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 574 const auto& info = registry.GetGraphicsInfo();
575 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
549 DeclareInputVertexArray(num_input); 576 DeclareInputVertexArray(num_input);
550 DeclareOutputVertex(); 577 DeclareOutputVertex();
551 } 578 }
@@ -742,12 +769,34 @@ private:
742 } 769 }
743 770
744 void DeclareOutputAttributes() { 771 void DeclareOutputAttributes() {
772 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
773 return;
774 }
775
776 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
745 for (const auto index : ir.GetOutputAttributes()) { 777 for (const auto index : ir.GetOutputAttributes()) {
746 if (!IsGenericAttribute(index)) { 778 if (!IsGenericAttribute(index)) {
747 continue; 779 continue;
748 } 780 }
749 const u32 location = GetGenericAttributeLocation(index); 781 DeclareOutputAttribute(index);
750 Id type = t_float4; 782 }
783 }
784
785 void DeclareOutputAttribute(Attribute::Index index) {
786 static constexpr std::string_view swizzle = "xyzw";
787
788 const u32 location = GetGenericAttributeLocation(index);
789 u8 element = 0;
790 while (element < 4) {
791 const std::size_t remainder = 4 - element;
792
793 std::size_t num_components = remainder;
794 const std::optional tfb = GetTransformFeedbackInfo(index, element);
795 if (tfb) {
796 num_components = tfb->components;
797 }
798
799 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
751 Id varying_default = v_varying_default; 800 Id varying_default = v_varying_default;
752 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
753 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
@@ -760,13 +809,45 @@ private:
760 } 809 }
761 type = TypePointer(spv::StorageClass::Output, type); 810 type = TypePointer(spv::StorageClass::Output, type);
762 811
812 std::string name = fmt::format("out_attr{}", location);
813 if (num_components < 4 || element > 0) {
814 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
815 }
816
763 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); 817 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
764 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); 818 Name(AddGlobalVariable(id), name);
765 output_attributes.emplace(index, id); 819
820 GenericVaryingDescription description;
821 description.id = id;
822 description.first_element = element;
823 description.is_scalar = num_components == 1;
824 for (u32 i = 0; i < num_components; ++i) {
825 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
826 output_attributes.emplace(offset, description);
827 }
766 interfaces.push_back(id); 828 interfaces.push_back(id);
767 829
768 Decorate(id, spv::Decoration::Location, location); 830 Decorate(id, spv::Decoration::Location, location);
831 if (element > 0) {
832 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
833 }
834 if (tfb && device.IsExtTransformFeedbackSupported()) {
835 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
836 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 }
839
840 element += static_cast<u8>(num_components);
841 }
842 }
843
844 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
845 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
846 const auto it = transform_feedback.find(location);
847 if (it == transform_feedback.end()) {
848 return {};
769 } 849 }
850 return it->second;
770 } 851 }
771 852
772 u32 DeclareConstantBuffers(u32 binding) { 853 u32 DeclareConstantBuffers(u32 binding) {
@@ -898,7 +979,7 @@ private:
898 u32 GetNumInputVertices() const { 979 u32 GetNumInputVertices() const {
899 switch (stage) { 980 switch (stage) {
900 case ShaderType::Geometry: 981 case ShaderType::Geometry:
901 return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 982 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
902 case ShaderType::TesselationControl: 983 case ShaderType::TesselationControl:
903 case ShaderType::TesselationEval: 984 case ShaderType::TesselationEval:
904 return NumInputPatches; 985 return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
1346 } 1427 }
1347 default: 1428 default:
1348 if (IsGenericAttribute(attribute)) { 1429 if (IsGenericAttribute(attribute)) {
1349 const Id composite = output_attributes.at(attribute); 1430 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1350 return {ArrayPass(t_out_float, composite, {element}), Type::Float}; 1431 const GenericVaryingDescription description = output_attributes.at(offset);
1432 const Id composite = description.id;
1433 std::vector<u32> indices;
1434 if (!description.is_scalar) {
1435 indices.push_back(element - description.first_element);
1436 }
1437 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1351 } 1438 }
1352 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", 1439 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1353 static_cast<u32>(attribute)); 1440 static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
1793 } 1880 }
1794 1881
1795 Expression ImageLoad(Operation operation) { 1882 Expression ImageLoad(Operation operation) {
1796 if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { 1883 if (!device.IsFormatlessImageLoadSupported()) {
1797 return {v_float_zero, Type::Float}; 1884 return {v_float_zero, Type::Float};
1798 } 1885 }
1799 1886
@@ -2258,11 +2345,11 @@ private:
2258 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { 2345 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2259 switch (type) { 2346 switch (type) {
2260 case Type::Float: 2347 case Type::Float:
2261 return {nullptr, t_float2, t_float3, t_float4}; 2348 return {t_float, t_float2, t_float3, t_float4};
2262 case Type::Int: 2349 case Type::Int:
2263 return {nullptr, t_int2, t_int3, t_int4}; 2350 return {t_int, t_int2, t_int3, t_int4};
2264 case Type::Uint: 2351 case Type::Uint:
2265 return {nullptr, t_uint2, t_uint3, t_uint4}; 2352 return {t_uint, t_uint2, t_uint3, t_uint4};
2266 default: 2353 default:
2267 UNIMPLEMENTED(); 2354 UNIMPLEMENTED();
2268 return {}; 2355 return {};
@@ -2495,7 +2582,9 @@ private:
2495 const ShaderIR& ir; 2582 const ShaderIR& ir;
2496 const ShaderType stage; 2583 const ShaderType stage;
2497 const Tegra::Shader::Header header; 2584 const Tegra::Shader::Header header;
2585 const Registry& registry;
2498 const Specialization& specialization; 2586 const Specialization& specialization;
2587 std::unordered_map<u8, VaryingTFB> transform_feedback;
2499 2588
2500 const Id t_void = Name(TypeVoid(), "void"); 2589 const Id t_void = Name(TypeVoid(), "void");
2501 2590
@@ -2584,7 +2673,7 @@ private:
2584 Id shared_memory{}; 2673 Id shared_memory{};
2585 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; 2674 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2586 std::map<Attribute::Index, Id> input_attributes; 2675 std::map<Attribute::Index, Id> input_attributes;
2587 std::map<Attribute::Index, Id> output_attributes; 2676 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2588 std::map<u32, Id> constant_buffers; 2677 std::map<u32, Id> constant_buffers;
2589 std::map<GlobalMemoryBase, Id> global_buffers; 2678 std::map<GlobalMemoryBase, Id> global_buffers;
2590 std::map<u32, TexelBuffer> texel_buffers; 2679 std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
2870} 2959}
2871 2960
2872std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 2961std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
2873 ShaderType stage, const Specialization& specialization) { 2962 ShaderType stage, const VideoCommon::Shader::Registry& registry,
2874 return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); 2963 const Specialization& specialization) {
2964 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
2875} 2965}
2876 2966
2877} // namespace Vulkan 2967} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
18#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h" 19#include "video_core/shader/shader_ir.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
91 u32 shared_memory_size{}; 92 u32 shared_memory_size{};
92 93
93 // Graphics specific 94 // Graphics specific
94 Maxwell::PrimitiveTopology primitive_topology{};
95 std::optional<float> point_size{}; 95 std::optional<float> point_size{};
96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
97 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
98
99 // Tessellation specific
100 struct {
101 Maxwell::TessellationPrimitive primitive{};
102 Maxwell::TessellationSpacing spacing{};
103 bool clockwise{};
104 } tessellation;
105}; 98};
106// Old gcc versions don't consider this trivially copyable. 99// Old gcc versions don't consider this trivially copyable.
107// static_assert(std::is_trivially_copyable_v<Specialization>); 100// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
114ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 107ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
115 108
116std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 109std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
117 Tegra::Engines::ShaderType stage, const Specialization& specialization); 110 Tegra::Engines::ShaderType stage,
111 const VideoCommon::Shader::Registry& registry,
112 const Specialization& specialization);
118 113
119} // namespace Vulkan 114} // namespace Vulkan
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
deleted file mode 100644
index 0638be8cb..000000000
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/common_types.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/shader/const_buffer_locker.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Engines::SamplerDescriptor;
16
17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
18 : stage{shader_stage} {}
19
20ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
21 Tegra::Engines::ConstBufferEngineInterface& engine)
22 : stage{shader_stage}, engine{&engine} {}
23
24ConstBufferLocker::~ConstBufferLocker() = default;
25
26std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
27 const std::pair<u32, u32> key = {buffer, offset};
28 const auto iter = keys.find(key);
29 if (iter != keys.end()) {
30 return iter->second;
31 }
32 if (!engine) {
33 return std::nullopt;
34 }
35 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
36 keys.emplace(key, value);
37 return value;
38}
39
40std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
41 const u32 key = offset;
42 const auto iter = bound_samplers.find(key);
43 if (iter != bound_samplers.end()) {
44 return iter->second;
45 }
46 if (!engine) {
47 return std::nullopt;
48 }
49 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
50 bound_samplers.emplace(key, value);
51 return value;
52}
53
54std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
55 u32 buffer, u32 offset) {
56 const std::pair key = {buffer, offset};
57 const auto iter = bindless_samplers.find(key);
58 if (iter != bindless_samplers.end()) {
59 return iter->second;
60 }
61 if (!engine) {
62 return std::nullopt;
63 }
64 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
65 bindless_samplers.emplace(key, value);
66 return value;
67}
68
69std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
70 if (bound_buffer_saved) {
71 return bound_buffer;
72 }
73 if (!engine) {
74 return std::nullopt;
75 }
76 bound_buffer_saved = true;
77 bound_buffer = engine->GetBoundBuffer();
78 return bound_buffer;
79}
80
81void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
82 keys.insert_or_assign({buffer, offset}, value);
83}
84
85void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
86 bound_samplers.insert_or_assign(offset, sampler);
87}
88
89void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
90 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
91}
92
93void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
94 bound_buffer_saved = true;
95 bound_buffer = buffer;
96}
97
98bool ConstBufferLocker::IsConsistent() const {
99 if (!engine) {
100 return false;
101 }
102 return std::all_of(keys.begin(), keys.end(),
103 [this](const auto& pair) {
104 const auto [cbuf, offset] = pair.first;
105 const auto value = pair.second;
106 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
107 }) &&
108 std::all_of(bound_samplers.begin(), bound_samplers.end(),
109 [this](const auto& sampler) {
110 const auto [key, value] = sampler;
111 return value == engine->AccessBoundSampler(stage, key);
112 }) &&
113 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
114 [this](const auto& sampler) {
115 const auto [cbuf, offset] = sampler.first;
116 const auto value = sampler.second;
117 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
118 });
119}
120
121bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
122 return std::tie(keys, bound_samplers, bindless_samplers) ==
123 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
124}
125
126} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
deleted file mode 100644
index d3ea11087..000000000
--- a/src/video_core/shader/const_buffer_locker.h
+++ /dev/null
@@ -1,103 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <unordered_map>
9#include "common/common_types.h"
10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
14
15namespace VideoCommon::Shader {
16
17using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
18using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
19using BindlessSamplerMap =
20 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
21
22/**
23 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
24 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
25 * shader compilation.
26 */
27class ConstBufferLocker {
28public:
29 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
30
31 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
32 Tegra::Engines::ConstBufferEngineInterface& engine);
33
34 ~ConstBufferLocker();
35
36 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
37 /// not it will obtain it from maxwell3d and register it.
38 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
39
40 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
41
42 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
43
44 std::optional<u32> ObtainBoundBuffer();
45
46 /// Inserts a key.
47 void InsertKey(u32 buffer, u32 offset, u32 value);
48
49 /// Inserts a bound sampler key.
50 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
51
52 /// Inserts a bindless sampler key.
53 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
54
55 /// Set the bound buffer for this locker.
56 void SetBoundBuffer(u32 buffer);
57
58 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
59 /// the same value, false otherwise;
60 bool IsConsistent() const;
61
62 /// Returns true if the keys are equal to the other ones in the locker.
63 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
64
65 /// Gives an getter to the const buffer keys in the database.
66 const KeyMap& GetKeys() const {
67 return keys;
68 }
69
70 /// Gets samplers database.
71 const BoundSamplerMap& GetBoundSamplers() const {
72 return bound_samplers;
73 }
74
75 /// Gets bindless samplers database.
76 const BindlessSamplerMap& GetBindlessSamplers() const {
77 return bindless_samplers;
78 }
79
80 /// Gets bound buffer used on this shader
81 u32 GetBoundBuffer() const {
82 return bound_buffer;
83 }
84
85 /// Obtains access to the guest driver's profile.
86 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
87 if (engine) {
88 return &engine->AccessGuestDriverProfile();
89 }
90 return nullptr;
91 }
92
93private:
94 const Tegra::Engines::ShaderType stage;
95 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
96 KeyMap keys;
97 BoundSamplerMap bound_samplers;
98 BindlessSamplerMap bindless_samplers;
99 bool bound_buffer_saved{};
100 u32 bound_buffer{};
101};
102
103} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 0229733b6..2e2711350 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/shader/ast.h" 14#include "video_core/shader/ast.h"
15#include "video_core/shader/control_flow.h" 15#include "video_core/shader/control_flow.h"
16#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
17 18
18namespace VideoCommon::Shader { 19namespace VideoCommon::Shader {
@@ -64,11 +65,11 @@ struct BlockInfo {
64}; 65};
65 66
66struct CFGRebuildState { 67struct CFGRebuildState {
67 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) 68 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry)
68 : program_code{program_code}, locker{locker}, start{start} {} 69 : program_code{program_code}, registry{registry}, start{start} {}
69 70
70 const ProgramCode& program_code; 71 const ProgramCode& program_code;
71 ConstBufferLocker& locker; 72 Registry& registry;
72 u32 start{}; 73 u32 start{};
73 std::vector<BlockInfo> block_info; 74 std::vector<BlockInfo> block_info;
74 std::list<u32> inspect_queries; 75 std::list<u32> inspect_queries;
@@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
438 const s32 pc_target = offset + result.relative_position; 439 const s32 pc_target = offset + result.relative_position;
439 std::vector<CaseBranch> branches; 440 std::vector<CaseBranch> branches;
440 for (u32 i = 0; i < result.entries; i++) { 441 for (u32 i = 0; i < result.entries; i++) {
441 auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); 442 auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
442 if (!key) { 443 if (!key) {
443 return {ParseResult::AbnormalFlow, parse_info}; 444 return {ParseResult::AbnormalFlow, parse_info};
444 } 445 }
@@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) {
656 657
657std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 658std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
658 const CompilerSettings& settings, 659 const CompilerSettings& settings,
659 ConstBufferLocker& locker) { 660 Registry& registry) {
660 auto result_out = std::make_unique<ShaderCharacteristics>(); 661 auto result_out = std::make_unique<ShaderCharacteristics>();
661 if (settings.depth == CompileDepth::BruteForce) { 662 if (settings.depth == CompileDepth::BruteForce) {
662 result_out->settings.depth = CompileDepth::BruteForce; 663 result_out->settings.depth = CompileDepth::BruteForce;
663 return result_out; 664 return result_out;
664 } 665 }
665 666
666 CFGRebuildState state{program_code, start_address, locker}; 667 CFGRebuildState state{program_code, start_address, registry};
667 // Inspect Code and generate blocks 668 // Inspect Code and generate blocks
668 state.labels.clear(); 669 state.labels.clear();
669 state.labels.emplace(start_address); 670 state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 5304998b9..62a3510d8 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -12,6 +12,7 @@
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
14#include "video_core/shader/compiler_settings.h" 14#include "video_core/shader/compiler_settings.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -111,6 +112,6 @@ struct ShaderCharacteristics {
111 112
112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, 113std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
113 const CompilerSettings& settings, 114 const CompilerSettings& settings,
114 ConstBufferLocker& locker); 115 Registry& registry);
115 116
116} // namespace VideoCommon::Shader 117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 6b697ed5d..87ac9ac6c 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
34 return (absolute_offset % SchedPeriod) == 0; 34 return (absolute_offset % SchedPeriod) == 0;
35} 35}
36 36
37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, 37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
38 const std::list<Sampler>& used_samplers) { 38 const std::list<Sampler>& used_samplers) {
39 if (gpu_driver == nullptr) { 39 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
40 LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
41 return;
42 }
43 if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
44 return; 40 return;
45 } 41 }
46 u32 count{}; 42 u32 count{};
@@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
53 bound_offsets.emplace_back(sampler.GetOffset()); 49 bound_offsets.emplace_back(sampler.GetOffset());
54 } 50 }
55 if (count > 1) { 51 if (count > 1) {
56 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); 52 gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
57 } 53 }
58} 54}
59 55
60std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 56std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
61 VideoCore::GuestDriverProfile* gpu_driver, 57 VideoCore::GuestDriverProfile& gpu_driver,
62 const std::list<Sampler>& used_samplers) { 58 const std::list<Sampler>& used_samplers) {
63 if (gpu_driver == nullptr) {
64 LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
65 return std::nullopt;
66 }
67 const u32 base_offset = sampler_to_deduce.GetOffset(); 59 const u32 base_offset = sampler_to_deduce.GetOffset();
68 u32 max_offset{std::numeric_limits<u32>::max()}; 60 u32 max_offset{std::numeric_limits<u32>::max()};
69 for (const auto& sampler : used_samplers) { 61 for (const auto& sampler : used_samplers) {
@@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
77 if (max_offset == std::numeric_limits<u32>::max()) { 69 if (max_offset == std::numeric_limits<u32>::max()) {
78 return std::nullopt; 70 return std::nullopt;
79 } 71 }
80 return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); 72 return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
81} 73}
82 74
83} // Anonymous namespace 75} // Anonymous namespace
@@ -149,7 +141,7 @@ void ShaderIR::Decode() {
149 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 141 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
150 142
151 decompiled = false; 143 decompiled = false;
152 auto info = ScanFlow(program_code, main_offset, settings, locker); 144 auto info = ScanFlow(program_code, main_offset, settings, registry);
153 auto& shader_info = *info; 145 auto& shader_info = *info;
154 coverage_begin = shader_info.start; 146 coverage_begin = shader_info.start;
155 coverage_end = shader_info.end; 147 coverage_end = shader_info.end;
@@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
364 356
365void ShaderIR::PostDecode() { 357void ShaderIR::PostDecode() {
366 // Deduce texture handler size if needed 358 // Deduce texture handler size if needed
367 auto gpu_driver = locker.AccessGuestDriverProfile(); 359 auto gpu_driver = registry.AccessGuestDriverProfile();
368 DeduceTextureHandlerSize(gpu_driver, used_samplers); 360 DeduceTextureHandlerSize(gpu_driver, used_samplers);
369 // Deduce Indexed Samplers 361 // Deduce Indexed Samplers
370 if (!uses_indexed_samplers) { 362 if (!uses_indexed_samplers) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bee7d8cad..48350e042 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -12,6 +12,7 @@
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
@@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample
359 if (sampler_info) { 360 if (sampler_info) {
360 return *sampler_info; 361 return *sampler_info;
361 } 362 }
362 const auto sampler = 363 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
363 buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); 364 : registry.ObtainBoundSampler(offset);
364 if (!sampler) { 365 if (!sampler) {
365 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
366 return SamplerInfo{TextureType::Texture2D, false, false, false}; 367 return SamplerInfo{TextureType::Texture2D, false, false, false};
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
new file mode 100644
index 000000000..af70b3f35
--- /dev/null
+++ b/src/video_core/shader/registry.cpp
@@ -0,0 +1,161 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <tuple>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/engines/kepler_compute.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
13#include "video_core/shader/registry.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Engines::ConstBufferEngineInterface;
18using Tegra::Engines::SamplerDescriptor;
19using Tegra::Engines::ShaderType;
20
21namespace {
22
23GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
24 if (shader_stage == ShaderType::Compute) {
25 return {};
26 }
27 auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
28
29 GraphicsInfo info;
30 info.tfb_layouts = graphics.regs.tfb_layouts;
31 info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
32 info.primitive_topology = graphics.regs.draw.topology;
33 info.tessellation_primitive = graphics.regs.tess_mode.prim;
34 info.tessellation_spacing = graphics.regs.tess_mode.spacing;
35 info.tfb_enabled = graphics.regs.tfb_enabled;
36 info.tessellation_clockwise = graphics.regs.tess_mode.cw;
37 return info;
38}
39
40ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
41 if (shader_stage != ShaderType::Compute) {
42 return {};
43 }
44 auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
45 const auto& launch = compute.launch_description;
46
47 ComputeInfo info;
48 info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
49 info.local_memory_size_in_words = launch.local_pos_alloc;
50 info.shared_memory_size_in_words = launch.shared_alloc;
51 return info;
52}
53
54} // Anonymous namespace
55
56Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
57 : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
58 bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
59
60Registry::Registry(Tegra::Engines::ShaderType shader_stage,
61 Tegra::Engines::ConstBufferEngineInterface& engine)
62 : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
63 graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
64 shader_stage, engine)} {}
65
66Registry::~Registry() = default;
67
68std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
69 const std::pair<u32, u32> key = {buffer, offset};
70 const auto iter = keys.find(key);
71 if (iter != keys.end()) {
72 return iter->second;
73 }
74 if (!engine) {
75 return std::nullopt;
76 }
77 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
78 keys.emplace(key, value);
79 return value;
80}
81
82std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
83 const u32 key = offset;
84 const auto iter = bound_samplers.find(key);
85 if (iter != bound_samplers.end()) {
86 return iter->second;
87 }
88 if (!engine) {
89 return std::nullopt;
90 }
91 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
92 bound_samplers.emplace(key, value);
93 return value;
94}
95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) {
98 const std::pair key = {buffer, offset};
99 const auto iter = bindless_samplers.find(key);
100 if (iter != bindless_samplers.end()) {
101 return iter->second;
102 }
103 if (!engine) {
104 return std::nullopt;
105 }
106 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
107 bindless_samplers.emplace(key, value);
108 return value;
109}
110
111void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
112 keys.insert_or_assign({buffer, offset}, value);
113}
114
115void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
116 bound_samplers.insert_or_assign(offset, sampler);
117}
118
119void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
120 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
121}
122
123bool Registry::IsConsistent() const {
124 if (!engine) {
125 return true;
126 }
127 return std::all_of(keys.begin(), keys.end(),
128 [this](const auto& pair) {
129 const auto [cbuf, offset] = pair.first;
130 const auto value = pair.second;
131 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
132 }) &&
133 std::all_of(bound_samplers.begin(), bound_samplers.end(),
134 [this](const auto& sampler) {
135 const auto [key, value] = sampler;
136 return value == engine->AccessBoundSampler(stage, key);
137 }) &&
138 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
139 [this](const auto& sampler) {
140 const auto [cbuf, offset] = sampler.first;
141 const auto value = sampler.second;
142 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
143 });
144}
145
146bool Registry::HasEqualKeys(const Registry& rhs) const {
147 return std::tie(keys, bound_samplers, bindless_samplers) ==
148 std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
149}
150
151const GraphicsInfo& Registry::GetGraphicsInfo() const {
152 ASSERT(stage != Tegra::Engines::ShaderType::Compute);
153 return graphics_info;
154}
155
156const ComputeInfo& Registry::GetComputeInfo() const {
157 ASSERT(stage == Tegra::Engines::ShaderType::Compute);
158 return compute_info;
159}
160
161} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
new file mode 100644
index 000000000..0c80d35fd
--- /dev/null
+++ b/src/video_core/shader/registry.h
@@ -0,0 +1,137 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include "common/common_types.h"
14#include "common/hash.h"
15#include "video_core/engines/const_buffer_engine_interface.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h"
18#include "video_core/guest_driver.h"
19
20namespace VideoCommon::Shader {
21
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26
27struct GraphicsInfo {
28 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
29
30 std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
31 tfb_layouts{};
32 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
33 Maxwell::PrimitiveTopology primitive_topology{};
34 Maxwell::TessellationPrimitive tessellation_primitive{};
35 Maxwell::TessellationSpacing tessellation_spacing{};
36 bool tfb_enabled = false;
37 bool tessellation_clockwise = false;
38};
39static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
40 std::is_standard_layout_v<GraphicsInfo>);
41
42struct ComputeInfo {
43 std::array<u32, 3> workgroup_size{};
44 u32 shared_memory_size_in_words = 0;
45 u32 local_memory_size_in_words = 0;
46};
47static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
48
49struct SerializedRegistryInfo {
50 VideoCore::GuestDriverProfile guest_driver_profile;
51 u32 bound_buffer = 0;
52 GraphicsInfo graphics;
53 ComputeInfo compute;
54};
55
56/**
57 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
58 * With it, the shader can obtain required data from GPU state and store it for disk shader
59 * compilation.
60 */
61class Registry {
62public:
63 explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
64
65 explicit Registry(Tegra::Engines::ShaderType shader_stage,
66 Tegra::Engines::ConstBufferEngineInterface& engine);
67
68 ~Registry();
69
70 /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
71 /// not it will obtain it from maxwell3d and register it.
72 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
73
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77
78 /// Inserts a key.
79 void InsertKey(u32 buffer, u32 offset, u32 value);
80
81 /// Inserts a bound sampler key.
82 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
83
84 /// Inserts a bindless sampler key.
85 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
86
87 /// Checks keys and samplers against engine's current const buffers.
88 /// Returns true if they are the same value, false otherwise.
89 bool IsConsistent() const;
90
91 /// Returns true if the keys are equal to the other ones in the registry.
92 bool HasEqualKeys(const Registry& rhs) const;
93
94 /// Returns graphics information from this shader
95 const GraphicsInfo& GetGraphicsInfo() const;
96
97 /// Returns compute information from this shader
98 const ComputeInfo& GetComputeInfo() const;
99
100 /// Gives an getter to the const buffer keys in the database.
101 const KeyMap& GetKeys() const {
102 return keys;
103 }
104
105 /// Gets samplers database.
106 const BoundSamplerMap& GetBoundSamplers() const {
107 return bound_samplers;
108 }
109
110 /// Gets bindless samplers database.
111 const BindlessSamplerMap& GetBindlessSamplers() const {
112 return bindless_samplers;
113 }
114
115 /// Gets bound buffer used on this shader
116 u32 GetBoundBuffer() const {
117 return bound_buffer;
118 }
119
120 /// Obtains access to the guest driver's profile.
121 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
122 return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
123 }
124
125private:
126 const Tegra::Engines::ShaderType stage;
127 VideoCore::GuestDriverProfile stored_guest_driver_profile;
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys;
130 BoundSamplerMap bound_samplers;
131 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer;
133 GraphicsInfo graphics_info;
134 ComputeInfo compute_info;
135};
136
137} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 3a5d280a9..425927777 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -11,6 +11,7 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node_helper.h" 13#include "video_core/shader/node_helper.h"
14#include "video_core/shader/registry.h"
14#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
15 16
16namespace VideoCommon::Shader { 17namespace VideoCommon::Shader {
@@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation;
24using Tegra::Shader::Register; 25using Tegra::Shader::Register;
25 26
26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 27ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
27 ConstBufferLocker& locker) 28 Registry& registry)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 29 : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
29 Decode(); 30 Decode();
30 PostDecode(); 31 PostDecode();
31} 32}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index b0851c3be..dde036b40 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,8 +18,8 @@
18#include "video_core/engines/shader_header.h" 18#include "video_core/engines/shader_header.h"
19#include "video_core/shader/ast.h" 19#include "video_core/shader/ast.h"
20#include "video_core/shader/compiler_settings.h" 20#include "video_core/shader/compiler_settings.h"
21#include "video_core/shader/const_buffer_locker.h"
22#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
22#include "video_core/shader/registry.h"
23 23
24namespace VideoCommon::Shader { 24namespace VideoCommon::Shader {
25 25
@@ -69,7 +69,7 @@ struct GlobalMemoryUsage {
69class ShaderIR final { 69class ShaderIR final {
70public: 70public:
71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, 71 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
72 ConstBufferLocker& locker); 72 Registry& registry);
73 ~ShaderIR(); 73 ~ShaderIR();
74 74
75 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 75 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -414,7 +414,7 @@ private:
414 const ProgramCode& program_code; 414 const ProgramCode& program_code;
415 const u32 main_offset; 415 const u32 main_offset;
416 const CompilerSettings settings; 416 const CompilerSettings settings;
417 ConstBufferLocker& locker; 417 Registry& registry;
418 418
419 bool decompiled{}; 419 bool decompiled{};
420 bool disable_flow_stack{}; 420 bool disable_flow_stack{};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 15e22b9fa..10739b37d 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); 81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track}; 82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { 83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 auto bound_buffer = locker.ObtainBoundBuffer(); 84 const u32 bound_buffer = registry.GetBoundBuffer();
85 if (!bound_buffer) { 85 if (bound_buffer != cbuf->GetIndex()) {
86 return {}; 86 return {};
87 } 87 }
88 if (*bound_buffer != cbuf->GetIndex()) { 88 const auto pair = DecoupleIndirectRead(*operation);
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) { 89 if (!pair) {
93 return {}; 90 return {};
94 } 91 }
95 auto [gpr, base_offset] = *pair; 92 auto [gpr, base_offset] = *pair;
96 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); 93 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
97 auto gpu_driver = locker.AccessGuestDriverProfile(); 94 const auto& gpu_driver = registry.AccessGuestDriverProfile();
98 if (gpu_driver == nullptr) {
99 return {};
100 }
101 const u32 bindless_cv = NewCustomVariable(); 95 const u32 bindless_cv = NewCustomVariable();
102 const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, 96 const Node op =
103 Immediate(gpu_driver->GetTextureHandlerSize())); 97 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
104 98
105 const Node cv_node = GetCustomVariable(bindless_cv); 99 const Node cv_node = GetCustomVariable(bindless_cv);
106 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); 100 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index f00839313..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
113 params.height = tic.Height(); 113 params.height = tic.Height();
114 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
115 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
116 if (params.target == SurfaceTarget::TextureCubemap || 116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
117 params.target == SurfaceTarget::TextureCubeArray) { 117 params.depth = 1;
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
118 params.depth *= 6; 120 params.depth *= 6;
119 } 121 }
120 params.num_levels = tic.max_mip_level + 1; 122 params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 51373b687..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -104,6 +104,11 @@ public:
104 if (!cache_addr) { 104 if (!cache_addr) {
105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
106 } 106 }
107
108 if (!IsTypeCompatible(tic.texture_type, entry)) {
109 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
110 }
111
107 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
108 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
109 if (guard_samplers) { 114 if (guard_samplers) {
@@ -914,13 +919,15 @@ private:
914 params.width = 1; 919 params.width = 1;
915 params.height = 1; 920 params.height = 1;
916 params.depth = 1; 921 params.depth = 1;
922 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
923 params.depth = 6;
924 }
917 params.pitch = 4; 925 params.pitch = 4;
918 params.num_levels = 1; 926 params.num_levels = 1;
919 params.emulated_levels = 1; 927 params.emulated_levels = 1;
920 params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; 928 params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
921 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 929 params.type = VideoCore::Surface::SurfaceType::ColorTexture;
922 auto surface = CreateSurface(0ULL, params); 930 auto surface = CreateSurface(0ULL, params);
923 invalid_memory.clear();
924 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 931 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
925 surface->UploadTexture(invalid_memory); 932 surface->UploadTexture(invalid_memory);
926 surface->MarkAsModified(false, Tick()); 933 surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
1082 return siblings_table[static_cast<std::size_t>(format)]; 1089 return siblings_table[static_cast<std::size_t>(format)];
1083 } 1090 }
1084 1091
1092 /// Returns true the shader sampler entry is compatible with the TIC texture type.
1093 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
1094 const VideoCommon::Shader::Sampler& entry) {
1095 const auto shader_type = entry.GetType();
1096 switch (tic_type) {
1097 case Tegra::Texture::TextureType::Texture1D:
1098 case Tegra::Texture::TextureType::Texture1DArray:
1099 return shader_type == Tegra::Shader::TextureType::Texture1D;
1100 case Tegra::Texture::TextureType::Texture1DBuffer:
1101 // TODO(Rodrigo): Assume as valid for now
1102 return true;
1103 case Tegra::Texture::TextureType::Texture2D:
1104 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1105 return shader_type == Tegra::Shader::TextureType::Texture2D;
1106 case Tegra::Texture::TextureType::Texture2DArray:
1107 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1108 shader_type == Tegra::Shader::TextureType::TextureCube;
1109 case Tegra::Texture::TextureType::Texture3D:
1110 return shader_type == Tegra::Shader::TextureType::Texture3D;
1111 case Tegra::Texture::TextureType::TextureCubeArray:
1112 case Tegra::Texture::TextureType::TextureCubemap:
1113 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1114 return true;
1115 }
1116 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
1117 }
1118 UNREACHABLE();
1119 return true;
1120 }
1121
1085 struct FramebufferTargetInfo { 1122 struct FramebufferTargetInfo {
1086 TSurface target; 1123 TSurface target;
1087 TView view; 1124 TView view;
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 4f2bfab48..2a6483370 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"(
34QProgressBar {} 34QProgressBar {}
35QProgressBar::chunk {})"; 35QProgressBar::chunk {})";
36 36
37constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"(
38QProgressBar {
39 background-color: black;
40 border: 2px solid white;
41 border-radius: 4px;
42 padding: 2px;
43}
44QProgressBar::chunk {
45 background-color: #0ab9e6;
46 width: 1px;
47})";
48
49constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( 37constexpr char PROGRESSBAR_STYLE_BUILD[] = R"(
50QProgressBar { 38QProgressBar {
51 background-color: black; 39 background-color: black;
@@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent)
100 88
101 stage_translations = { 89 stage_translations = {
102 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, 90 {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")},
103 {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")},
104 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, 91 {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")},
105 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, 92 {VideoCore::LoadCallbackStage::Complete, tr("Launching...")},
106 }; 93 };
107 progressbar_style = { 94 progressbar_style = {
108 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, 95 {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE},
109 {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE},
110 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, 96 {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD},
111 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, 97 {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE},
112 }; 98 };
@@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size
192 } 178 }
193 179
194 // update labels and progress bar 180 // update labels and progress bar
195 if (stage == VideoCore::LoadCallbackStage::Decompile || 181 if (stage == VideoCore::LoadCallbackStage::Build) {
196 stage == VideoCore::LoadCallbackStage::Build) {
197 ui->stage->setText(stage_translations[stage].arg(value).arg(total)); 182 ui->stage->setText(stage_translations[stage].arg(value).arg(total));
198 } else { 183 } else {
199 ui->stage->setText(stage_translations[stage]); 184 ui->stage->setText(stage_translations[stage]);