summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h10
-rw-r--r--src/video_core/engines/kepler_compute.cpp1
-rw-r--r--src/video_core/engines/kepler_compute.h10
-rw-r--r--src/video_core/engines/maxwell_3d.cpp33
-rw-r--r--src/video_core/engines/maxwell_3d.h14
-rw-r--r--src/video_core/engines/shader_type.h21
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_device.h26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp226
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h27
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp340
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp165
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp78
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h102
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_state.h8
-rw-r--r--src/video_core/renderer_opengl/utils.cpp32
-rw-r--r--src/video_core/renderer_opengl/utils.h18
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp14
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h2
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp1
-rw-r--r--src/video_core/shader/const_buffer_locker.h3
-rw-r--r--src/video_core/shader/decode/texture.cpp111
-rw-r--r--src/video_core/shader/node.h14
-rw-r--r--src/video_core/shader/shader_ir.h13
31 files changed, 742 insertions, 744 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6f3f2aa9f..3b20c7d34 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -22,6 +22,7 @@ add_library(video_core STATIC
22 engines/maxwell_dma.h 22 engines/maxwell_dma.h
23 engines/shader_bytecode.h 23 engines/shader_bytecode.h
24 engines/shader_header.h 24 engines/shader_header.h
25 engines/shader_type.h
25 gpu.cpp 26 gpu.cpp
26 gpu.h 27 gpu.h
27 gpu_asynch.cpp 28 gpu_asynch.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ac27b6cbe..44b8b8d22 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -8,19 +8,11 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h" 10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h"
11#include "video_core/textures/texture.h" 12#include "video_core/textures/texture.h"
12 13
13namespace Tegra::Engines { 14namespace Tegra::Engines {
14 15
15enum class ShaderType : u32 {
16 Vertex = 0,
17 TesselationControl = 1,
18 TesselationEval = 2,
19 Geometry = 3,
20 Fragment = 4,
21 Compute = 5,
22};
23
24struct SamplerDescriptor { 16struct SamplerDescriptor {
25 union { 17 union {
26 BitField<0, 20, Tegra::Shader::TextureType> texture_type; 18 BitField<0, 20, Tegra::Shader::TextureType> texture_type;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 3a39aeabe..110406f2f 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -8,6 +8,7 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "video_core/engines/kepler_compute.h" 9#include "video_core/engines/kepler_compute.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h"
11#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_base.h" 14#include "video_core/renderer_base.h"
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 5259d92bd..4ef3e0613 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,6 +12,7 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h" 13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
15#include "video_core/engines/shader_type.h"
15#include "video_core/gpu.h" 16#include "video_core/gpu.h"
16#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
17 18
@@ -140,7 +141,7 @@ public:
140 141
141 INSERT_PADDING_WORDS(0x3); 142 INSERT_PADDING_WORDS(0x3);
142 143
143 BitField<0, 16, u32> shared_alloc; 144 BitField<0, 18, u32> shared_alloc;
144 145
145 BitField<16, 16, u32> block_dim_x; 146 BitField<16, 16, u32> block_dim_x;
146 union { 147 union {
@@ -178,7 +179,12 @@ public:
178 BitField<24, 5, u32> gpr_alloc; 179 BitField<24, 5, u32> gpr_alloc;
179 }; 180 };
180 181
181 INSERT_PADDING_WORDS(0x11); 182 union {
183 BitField<0, 20, u32> local_crs_alloc;
184 BitField<24, 5, u32> sass_version;
185 };
186
187 INSERT_PADDING_WORDS(0x10);
182 } launch_description{}; 188 } launch_description{};
183 189
184 struct { 190 struct {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a44c09003..15a7a9d6a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/debug_utils/debug_utils.h" 10#include "video_core/debug_utils/debug_utils.h"
11#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
14#include "video_core/textures/texture.h" 15#include "video_core/textures/texture.h"
@@ -368,24 +369,24 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
368 StartCBData(method); 369 StartCBData(method);
369 break; 370 break;
370 } 371 }
371 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 372 case MAXWELL3D_REG_INDEX(cb_bind[0]): {
372 ProcessCBBind(Regs::ShaderStage::Vertex); 373 ProcessCBBind(0);
373 break; 374 break;
374 } 375 }
375 case MAXWELL3D_REG_INDEX(cb_bind[1].raw_config): { 376 case MAXWELL3D_REG_INDEX(cb_bind[1]): {
376 ProcessCBBind(Regs::ShaderStage::TesselationControl); 377 ProcessCBBind(1);
377 break; 378 break;
378 } 379 }
379 case MAXWELL3D_REG_INDEX(cb_bind[2].raw_config): { 380 case MAXWELL3D_REG_INDEX(cb_bind[2]): {
380 ProcessCBBind(Regs::ShaderStage::TesselationEval); 381 ProcessCBBind(2);
381 break; 382 break;
382 } 383 }
383 case MAXWELL3D_REG_INDEX(cb_bind[3].raw_config): { 384 case MAXWELL3D_REG_INDEX(cb_bind[3]): {
384 ProcessCBBind(Regs::ShaderStage::Geometry); 385 ProcessCBBind(3);
385 break; 386 break;
386 } 387 }
387 case MAXWELL3D_REG_INDEX(cb_bind[4].raw_config): { 388 case MAXWELL3D_REG_INDEX(cb_bind[4]): {
388 ProcessCBBind(Regs::ShaderStage::Fragment); 389 ProcessCBBind(4);
389 break; 390 break;
390 } 391 }
391 case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { 392 case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
@@ -687,10 +688,10 @@ void Maxwell3D::DrawArrays() {
687 } 688 }
688} 689}
689 690
690void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { 691void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
691 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. 692 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
692 auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; 693 auto& shader = state.shader_stages[stage_index];
693 auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)]; 694 auto& bind_data = regs.cb_bind[stage_index];
694 695
695 ASSERT(bind_data.index < Regs::MaxConstBuffers); 696 ASSERT(bind_data.index < Regs::MaxConstBuffers);
696 auto& buffer = shader.const_buffers[bind_data.index]; 697 auto& buffer = shader.const_buffers[bind_data.index];
@@ -757,9 +758,9 @@ Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_ha
757 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; 758 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
758} 759}
759 760
760Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, 761Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
761 std::size_t offset) const { 762 const auto stage_index = static_cast<std::size_t>(stage);
762 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; 763 const auto& shader = state.shader_stages[stage_index];
763 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; 764 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
764 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 765 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
765 766
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 37390eb87..4cb7339b5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -18,6 +18,7 @@
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "video_core/engines/const_buffer_engine_interface.h"
19#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
20#include "video_core/engines/engine_upload.h" 20#include "video_core/engines/engine_upload.h"
21#include "video_core/engines/shader_type.h"
21#include "video_core/gpu.h" 22#include "video_core/gpu.h"
22#include "video_core/macro_interpreter.h" 23#include "video_core/macro_interpreter.h"
23#include "video_core/textures/texture.h" 24#include "video_core/textures/texture.h"
@@ -62,7 +63,6 @@ public:
62 static constexpr std::size_t NumVertexArrays = 32; 63 static constexpr std::size_t NumVertexArrays = 32;
63 static constexpr std::size_t NumVertexAttributes = 32; 64 static constexpr std::size_t NumVertexAttributes = 32;
64 static constexpr std::size_t NumVaryings = 31; 65 static constexpr std::size_t NumVaryings = 31;
65 static constexpr std::size_t NumTextureSamplers = 32;
66 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number 66 static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
67 static constexpr std::size_t NumClipDistances = 8; 67 static constexpr std::size_t NumClipDistances = 8;
68 static constexpr std::size_t MaxShaderProgram = 6; 68 static constexpr std::size_t MaxShaderProgram = 6;
@@ -130,14 +130,6 @@ public:
130 Fragment = 5, 130 Fragment = 5,
131 }; 131 };
132 132
133 enum class ShaderStage : u32 {
134 Vertex = 0,
135 TesselationControl = 1,
136 TesselationEval = 2,
137 Geometry = 3,
138 Fragment = 4,
139 };
140
141 struct VertexAttribute { 133 struct VertexAttribute {
142 enum class Size : u32 { 134 enum class Size : u32 {
143 Invalid = 0x0, 135 Invalid = 0x0,
@@ -1254,7 +1246,7 @@ public:
1254 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; 1246 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
1255 1247
1256 /// Returns the texture information for a specific texture in a specific shader stage. 1248 /// Returns the texture information for a specific texture in a specific shader stage.
1257 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; 1249 Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
1258 1250
1259 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 1251 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1260 1252
@@ -1376,7 +1368,7 @@ private:
1376 void FinishCBData(); 1368 void FinishCBData();
1377 1369
1378 /// Handles a write to the CB_BIND register. 1370 /// Handles a write to the CB_BIND register.
1379 void ProcessCBBind(Regs::ShaderStage stage); 1371 void ProcessCBBind(std::size_t stage_index);
1380 1372
1381 /// Handles a write to the VERTEX_END_GL register, triggering a draw. 1373 /// Handles a write to the VERTEX_END_GL register, triggering a draw.
1382 void DrawArrays(); 1374 void DrawArrays();
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h
new file mode 100644
index 000000000..49ce5cde5
--- /dev/null
+++ b/src/video_core/engines/shader_type.h
@@ -0,0 +1,21 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Tegra::Engines {
10
11enum class ShaderType : u32 {
12 Vertex = 0,
13 TesselationControl = 1,
14 TesselationEval = 2,
15 Geometry = 3,
16 Fragment = 4,
17 Compute = 5,
18};
19static constexpr std::size_t MaxShaderTypes = 6;
20
21} // namespace Tegra::Engines
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b30d5be74..a95bd4b2c 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,7 +5,9 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <optional>
8#include <vector> 9#include <vector>
10
9#include <glad/glad.h> 11#include <glad/glad.h>
10 12
11#include "common/logging/log.h" 13#include "common/logging/log.h"
@@ -17,6 +19,30 @@ namespace OpenGL {
17 19
18namespace { 20namespace {
19 21
22// One uniform block is reserved for emulation purposes
23constexpr u32 ReservedUniformBlocks = 1;
24
25constexpr u32 NumStages = 5;
26
27constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
28 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
29 GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS};
30
31constexpr std::array LimitSSBOs = {
32 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
33 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
34 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS};
35
36constexpr std::array LimitSamplers = {
37 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
38 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
39 GL_MAX_TEXTURE_IMAGE_UNITS};
40
41constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS,
42 GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
43 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS,
44 GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS};
45
20template <typename T> 46template <typename T>
21T GetInteger(GLenum pname) { 47T GetInteger(GLenum pname) {
22 GLint temporary; 48 GLint temporary;
@@ -48,13 +74,70 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
48 return std::find(images.begin(), images.end(), extension) != images.end(); 74 return std::find(images.begin(), images.end(), extension) != images.end();
49} 75}
50 76
77u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
78 ASSERT(num >= amount);
79 if (limit) {
80 amount = std::min(amount, GetInteger<u32>(*limit));
81 }
82 num -= amount;
83 return std::exchange(base, base + amount);
84}
85
86std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
87 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
88
89 static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4};
90 const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
91 const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
92 const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
93
94 u32 num_ubos = total_ubos - ReservedUniformBlocks;
95 u32 num_ssbos = total_ssbos;
96 u32 num_samplers = total_samplers;
97
98 u32 base_ubo = ReservedUniformBlocks;
99 u32 base_ssbo = 0;
100 u32 base_samplers = 0;
101
102 for (std::size_t i = 0; i < NumStages; ++i) {
103 const std::size_t stage = stage_swizzle[i];
104 bindings[stage] = {
105 Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
106 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
107 Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
108 }
109
110 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
111 u32 base_images = 0;
112
113 // Reserve more image bindings on fragment and vertex stages.
114 bindings[4].image =
115 Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]);
116 bindings[0].image =
117 Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]);
118
119 // Reserve the other image bindings.
120 const u32 total_extracted_images = num_images / (NumStages - 2);
121 for (std::size_t i = 2; i < NumStages; ++i) {
122 const std::size_t stage = stage_swizzle[i];
123 bindings[stage].image =
124 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
125 }
126
127 // Compute doesn't care about any of this.
128 bindings[5] = {0, 0, 0, 0};
129
130 return bindings;
131}
132
51} // Anonymous namespace 133} // Anonymous namespace
52 134
53Device::Device() { 135Device::Device() : base_bindings{BuildBaseBindings()} {
54 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 136 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
55 const std::vector extensions = GetExtensions(); 137 const std::vector extensions = GetExtensions();
56 138
57 const bool is_nvidia = vendor == "NVIDIA Corporation"; 139 const bool is_nvidia = vendor == "NVIDIA Corporation";
140 const bool is_intel = vendor == "Intel";
58 141
59 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 142 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
60 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 143 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -68,6 +151,7 @@ Device::Device() {
68 has_variable_aoffi = TestVariableAoffi(); 151 has_variable_aoffi = TestVariableAoffi();
69 has_component_indexing_bug = TestComponentIndexingBug(); 152 has_component_indexing_bug = TestComponentIndexingBug();
70 has_precise_bug = TestPreciseBug(); 153 has_precise_bug = TestPreciseBug();
154 has_broken_compute = is_intel;
71 has_fast_buffer_sub_data = is_nvidia; 155 has_fast_buffer_sub_data = is_nvidia;
72 156
73 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 157 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
@@ -85,6 +169,7 @@ Device::Device(std::nullptr_t) {
85 has_image_load_formatted = true; 169 has_image_load_formatted = true;
86 has_variable_aoffi = true; 170 has_variable_aoffi = true;
87 has_component_indexing_bug = false; 171 has_component_indexing_bug = false;
172 has_broken_compute = false;
88 has_precise_bug = false; 173 has_precise_bug = false;
89} 174}
90 175
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 6c86fe207..5433815b9 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -6,14 +6,32 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/engines/shader_type.h"
9 10
10namespace OpenGL { 11namespace OpenGL {
11 12
12class Device { 13static constexpr u32 EmulationUniformBlockBinding = 0;
14
15class Device final {
13public: 16public:
17 struct BaseBindings final {
18 u32 uniform_buffer{};
19 u32 shader_storage_buffer{};
20 u32 sampler{};
21 u32 image{};
22 };
23
14 explicit Device(); 24 explicit Device();
15 explicit Device(std::nullptr_t); 25 explicit Device(std::nullptr_t);
16 26
27 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
28 return base_bindings[stage_index];
29 }
30
31 const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
32 return GetBaseBindings(static_cast<std::size_t>(shader_type));
33 }
34
17 std::size_t GetUniformBufferAlignment() const { 35 std::size_t GetUniformBufferAlignment() const {
18 return uniform_buffer_alignment; 36 return uniform_buffer_alignment;
19 } 37 }
@@ -58,6 +76,10 @@ public:
58 return has_precise_bug; 76 return has_precise_bug;
59 } 77 }
60 78
79 bool HasBrokenCompute() const {
80 return has_broken_compute;
81 }
82
61 bool HasFastBufferSubData() const { 83 bool HasFastBufferSubData() const {
62 return has_fast_buffer_sub_data; 84 return has_fast_buffer_sub_data;
63 } 85 }
@@ -67,6 +89,7 @@ private:
67 static bool TestComponentIndexingBug(); 89 static bool TestComponentIndexingBug();
68 static bool TestPreciseBug(); 90 static bool TestPreciseBug();
69 91
92 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
70 std::size_t uniform_buffer_alignment{}; 93 std::size_t uniform_buffer_alignment{};
71 std::size_t shader_storage_alignment{}; 94 std::size_t shader_storage_alignment{};
72 u32 max_vertex_attributes{}; 95 u32 max_vertex_attributes{};
@@ -78,6 +101,7 @@ private:
78 bool has_variable_aoffi{}; 101 bool has_variable_aoffi{};
79 bool has_component_indexing_bug{}; 102 bool has_component_indexing_bug{};
80 bool has_precise_bug{}; 103 bool has_precise_bug{};
104 bool has_broken_compute{};
81 bool has_fast_buffer_sub_data{}; 105 bool has_fast_buffer_sub_data{};
82}; 106};
83 107
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 05f8e511b..f97ec06f0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -22,6 +22,7 @@
22#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/kepler_compute.h" 23#include "video_core/engines/kepler_compute.h"
24#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/engines/shader_type.h"
25#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
26#include "video_core/renderer_opengl/gl_rasterizer.h" 27#include "video_core/renderer_opengl/gl_rasterizer.h"
27#include "video_core/renderer_opengl/gl_shader_cache.h" 28#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -49,8 +50,25 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
49MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); 50MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
50MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); 51MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
51 52
52static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 53namespace {
53 const GLShader::ConstBufferEntry& entry) { 54
55template <typename Engine, typename Entry>
56Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
57 Tegra::Engines::ShaderType shader_type) {
58 if (entry.IsBindless()) {
59 const Tegra::Texture::TextureHandle tex_handle =
60 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
61 return engine.GetTextureInfo(tex_handle);
62 }
63 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
64 return engine.GetStageTexture(shader_type, entry.GetOffset());
65 } else {
66 return engine.GetTexture(entry.GetOffset());
67 }
68}
69
70std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
71 const GLShader::ConstBufferEntry& entry) {
54 if (!entry.IsIndirect()) { 72 if (!entry.IsIndirect()) {
55 return entry.GetSize(); 73 return entry.GetSize();
56 } 74 }
@@ -64,6 +82,8 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
64 return buffer.size; 82 return buffer.size;
65} 83}
66 84
85} // Anonymous namespace
86
67RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 87RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
68 ScreenInfo& info) 88 ScreenInfo& info)
69 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 89 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
@@ -238,12 +258,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
238 MICROPROFILE_SCOPE(OpenGL_Shader); 258 MICROPROFILE_SCOPE(OpenGL_Shader);
239 auto& gpu = system.GPU().Maxwell3D(); 259 auto& gpu = system.GPU().Maxwell3D();
240 260
241 BaseBindings base_bindings;
242 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 261 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
243 262
244 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 263 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
245 const auto& shader_config = gpu.regs.shader_config[index]; 264 const auto& shader_config = gpu.regs.shader_config[index];
246 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 265 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
247 266
248 // Skip stages that are not enabled 267 // Skip stages that are not enabled
249 if (!gpu.regs.IsShaderConfigEnabled(index)) { 268 if (!gpu.regs.IsShaderConfigEnabled(index)) {
@@ -257,24 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
257 continue; 276 continue;
258 } 277 }
259 278
260 GLShader::MaxwellUniformData ubo{};
261 ubo.SetFromRegs(gpu);
262 const auto [buffer, offset] =
263 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
264
265 // Bind the emulation info buffer
266 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
267
268 Shader shader{shader_cache.GetStageProgram(program)}; 279 Shader shader{shader_cache.GetStageProgram(program)};
269 280
270 // Stage indices are 0 - 5 281 // Stage indices are 0 - 5
271 const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1); 282 const std::size_t stage = index == 0 ? 0 : index - 1;
272 SetupDrawConstBuffers(stage, shader); 283 SetupDrawConstBuffers(stage, shader);
273 SetupDrawGlobalMemory(stage, shader); 284 SetupDrawGlobalMemory(stage, shader);
274 const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; 285 SetupDrawTextures(stage, shader);
286 SetupDrawImages(stage, shader);
275 287
276 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; 288 const ProgramVariant variant(primitive_mode);
277 const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); 289 const auto program_handle = shader->GetHandle(variant);
278 290
279 switch (program) { 291 switch (program) {
280 case Maxwell::ShaderProgram::VertexA: 292 case Maxwell::ShaderProgram::VertexA:
@@ -303,10 +315,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
303 // When VertexA is enabled, we have dual vertex shaders 315 // When VertexA is enabled, we have dual vertex shaders
304 if (program == Maxwell::ShaderProgram::VertexA) { 316 if (program == Maxwell::ShaderProgram::VertexA) {
305 // VertexB was combined with VertexA, so we skip the VertexB iteration 317 // VertexB was combined with VertexA, so we skip the VertexB iteration
306 index++; 318 ++index;
307 } 319 }
308
309 base_bindings = next_bindings;
310 } 320 }
311 321
312 SyncClipEnabled(clip_distances); 322 SyncClipEnabled(clip_distances);
@@ -591,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() {
591 index_buffer_offset = SetupIndexBuffer(); 601 index_buffer_offset = SetupIndexBuffer();
592 602
593 // Prepare packed bindings. 603 // Prepare packed bindings.
594 bind_ubo_pushbuffer.Setup(0); 604 bind_ubo_pushbuffer.Setup();
595 bind_ssbo_pushbuffer.Setup(0); 605 bind_ssbo_pushbuffer.Setup();
606
607 // Setup emulation uniform buffer.
608 GLShader::MaxwellUniformData ubo;
609 ubo.SetFromRegs(gpu);
610 const auto [buffer, offset] =
611 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
612 bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
613 static_cast<GLsizeiptr>(sizeof(ubo)));
596 614
597 // Setup shaders and their used resources. 615 // Setup shaders and their used resources.
598 texture_cache.GuardSamplers(true); 616 texture_cache.GuardSamplers(true);
@@ -725,19 +743,21 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
725} 743}
726 744
727void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 745void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
728 if (!GLAD_GL_ARB_compute_variable_group_size) { 746 if (device.HasBrokenCompute()) {
729 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
730 "lack of GL_ARB_compute_variable_group_size");
731 return; 747 return;
732 } 748 }
733 749
750 buffer_cache.Acquire();
751
734 auto kernel = shader_cache.GetComputeKernel(code_addr); 752 auto kernel = shader_cache.GetComputeKernel(code_addr);
735 ProgramVariant variant; 753 SetupComputeTextures(kernel);
736 variant.texture_buffer_usage = SetupComputeTextures(kernel);
737 SetupComputeImages(kernel); 754 SetupComputeImages(kernel);
738 755
739 const auto [program, next_bindings] = kernel->GetProgramHandle(variant); 756 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
740 state.draw.shader_program = program; 757 const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
758 launch_desc.block_dim_z, launch_desc.shared_alloc,
759 launch_desc.local_pos_alloc);
760 state.draw.shader_program = kernel->GetHandle(variant);
741 state.draw.program_pipeline = 0; 761 state.draw.program_pipeline = 0;
742 762
743 const std::size_t buffer_size = 763 const std::size_t buffer_size =
@@ -745,8 +765,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
745 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 765 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
746 buffer_cache.Map(buffer_size); 766 buffer_cache.Map(buffer_size);
747 767
748 bind_ubo_pushbuffer.Setup(0); 768 bind_ubo_pushbuffer.Setup();
749 bind_ssbo_pushbuffer.Setup(0); 769 bind_ssbo_pushbuffer.Setup();
750 770
751 SetupComputeConstBuffers(kernel); 771 SetupComputeConstBuffers(kernel);
752 SetupComputeGlobalMemory(kernel); 772 SetupComputeGlobalMemory(kernel);
@@ -761,10 +781,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
761 state.ApplyShaderProgram(); 781 state.ApplyShaderProgram();
762 state.ApplyProgramPipeline(); 782 state.ApplyProgramPipeline();
763 783
764 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 784 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
765 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
766 launch_desc.grid_dim_z, launch_desc.block_dim_x,
767 launch_desc.block_dim_y, launch_desc.block_dim_z);
768} 785}
769 786
770void RasterizerOpenGL::FlushAll() {} 787void RasterizerOpenGL::FlushAll() {}
@@ -833,7 +850,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
833 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 850 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
834 851
835 if (params.pixel_format != pixel_format) { 852 if (params.pixel_format != pixel_format) {
836 LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); 853 LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
837 } 854 }
838 855
839 screen_info.display_texture = surface->GetTexture(); 856 screen_info.display_texture = surface->GetTexture();
@@ -842,20 +859,23 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
842 return true; 859 return true;
843} 860}
844 861
845void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 862void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
846 const Shader& shader) {
847 MICROPROFILE_SCOPE(OpenGL_UBO); 863 MICROPROFILE_SCOPE(OpenGL_UBO);
848 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 864 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
849 const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; 865 const auto& shader_stage = stages[stage_index];
866
867 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
850 for (const auto& entry : shader->GetShaderEntries().const_buffers) { 868 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
851 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 869 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
852 SetupConstBuffer(buffer, entry); 870 SetupConstBuffer(binding++, buffer, entry);
853 } 871 }
854} 872}
855 873
856void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 874void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
857 MICROPROFILE_SCOPE(OpenGL_UBO); 875 MICROPROFILE_SCOPE(OpenGL_UBO);
858 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 876 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
877
878 u32 binding = 0;
859 for (const auto& entry : kernel->GetShaderEntries().const_buffers) { 879 for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
860 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 880 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
861 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 881 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -863,15 +883,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
863 buffer.address = config.Address(); 883 buffer.address = config.Address();
864 buffer.size = config.size; 884 buffer.size = config.size;
865 buffer.enabled = mask[entry.GetIndex()]; 885 buffer.enabled = mask[entry.GetIndex()];
866 SetupConstBuffer(buffer, entry); 886 SetupConstBuffer(binding++, buffer, entry);
867 } 887 }
868} 888}
869 889
870void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 890void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
871 const GLShader::ConstBufferEntry& entry) { 891 const GLShader::ConstBufferEntry& entry) {
872 if (!buffer.enabled) { 892 if (!buffer.enabled) {
873 // Set values to zero to unbind buffers 893 // Set values to zero to unbind buffers
874 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); 894 bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
895 sizeof(float));
875 return; 896 return;
876 } 897 }
877 898
@@ -882,19 +903,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
882 const auto alignment = device.GetUniformBufferAlignment(); 903 const auto alignment = device.GetUniformBufferAlignment();
883 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 904 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
884 device.HasFastBufferSubData()); 905 device.HasFastBufferSubData());
885 bind_ubo_pushbuffer.Push(cbuf, offset, size); 906 bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
886} 907}
887 908
888void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 909void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
889 const Shader& shader) {
890 auto& gpu{system.GPU()}; 910 auto& gpu{system.GPU()};
891 auto& memory_manager{gpu.MemoryManager()}; 911 auto& memory_manager{gpu.MemoryManager()};
892 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; 912 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
913
914 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
893 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { 915 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
894 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; 916 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
895 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 917 const auto gpu_addr{memory_manager.Read<u64>(addr)};
896 const auto size{memory_manager.Read<u32>(addr + 8)}; 918 const auto size{memory_manager.Read<u32>(addr + 8)};
897 SetupGlobalMemory(entry, gpu_addr, size); 919 SetupGlobalMemory(binding++, entry, gpu_addr, size);
898 } 920 }
899} 921}
900 922
@@ -902,120 +924,82 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
902 auto& gpu{system.GPU()}; 924 auto& gpu{system.GPU()};
903 auto& memory_manager{gpu.MemoryManager()}; 925 auto& memory_manager{gpu.MemoryManager()};
904 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 926 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
927
928 u32 binding = 0;
905 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { 929 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
906 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; 930 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
907 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 931 const auto gpu_addr{memory_manager.Read<u64>(addr)};
908 const auto size{memory_manager.Read<u32>(addr + 8)}; 932 const auto size{memory_manager.Read<u32>(addr + 8)};
909 SetupGlobalMemory(entry, gpu_addr, size); 933 SetupGlobalMemory(binding++, entry, gpu_addr, size);
910 } 934 }
911} 935}
912 936
913void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, 937void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
914 GPUVAddr gpu_addr, std::size_t size) { 938 GPUVAddr gpu_addr, std::size_t size) {
915 const auto alignment{device.GetShaderStorageBufferAlignment()}; 939 const auto alignment{device.GetShaderStorageBufferAlignment()};
916 const auto [ssbo, buffer_offset] = 940 const auto [ssbo, buffer_offset] =
917 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); 941 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
918 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); 942 bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
919} 943}
920 944
921TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, 945void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
922 const Shader& shader,
923 BaseBindings base_bindings) {
924 MICROPROFILE_SCOPE(OpenGL_Texture); 946 MICROPROFILE_SCOPE(OpenGL_Texture);
925 const auto& gpu = system.GPU(); 947 const auto& maxwell3d = system.GPU().Maxwell3D();
926 const auto& maxwell3d = gpu.Maxwell3D(); 948 u32 binding = device.GetBaseBindings(stage_index).sampler;
927 const auto& entries = shader->GetShaderEntries().samplers; 949 for (const auto& entry : shader->GetShaderEntries().samplers) {
928 950 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
929 ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), 951 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
930 "Exceeded the number of active textures."); 952 SetupTexture(binding++, texture, entry);
931
932 TextureBufferUsage texture_buffer_usage{0};
933
934 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
935 const auto& entry = entries[bindpoint];
936 const auto texture = [&] {
937 if (!entry.IsBindless()) {
938 return maxwell3d.GetStageTexture(stage, entry.GetOffset());
939 }
940 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
941 const Tegra::Texture::TextureHandle tex_handle =
942 maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
943 return maxwell3d.GetTextureInfo(tex_handle);
944 }();
945
946 if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
947 texture_buffer_usage.set(bindpoint);
948 }
949 } 953 }
950
951 return texture_buffer_usage;
952} 954}
953 955
954TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { 956void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
955 MICROPROFILE_SCOPE(OpenGL_Texture); 957 MICROPROFILE_SCOPE(OpenGL_Texture);
956 const auto& compute = system.GPU().KeplerCompute(); 958 const auto& compute = system.GPU().KeplerCompute();
957 const auto& entries = kernel->GetShaderEntries().samplers; 959 u32 binding = 0;
958 960 for (const auto& entry : kernel->GetShaderEntries().samplers) {
959 ASSERT_MSG(entries.size() <= std::size(state.textures), 961 const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
960 "Exceeded the number of active textures."); 962 SetupTexture(binding++, texture, entry);
961
962 TextureBufferUsage texture_buffer_usage{0};
963
964 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
965 const auto& entry = entries[bindpoint];
966 const auto texture = [&] {
967 if (!entry.IsBindless()) {
968 return compute.GetTexture(entry.GetOffset());
969 }
970 const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
971 Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
972 return compute.GetTextureInfo(tex_handle);
973 }();
974
975 if (SetupTexture(bindpoint, texture, entry)) {
976 texture_buffer_usage.set(bindpoint);
977 }
978 } 963 }
979
980 return texture_buffer_usage;
981} 964}
982 965
983bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 966void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
984 const GLShader::SamplerEntry& entry) { 967 const GLShader::SamplerEntry& entry) {
985 state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
986
987 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 968 const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
988 if (!view) { 969 if (!view) {
989 // Can occur when texture addr is null or its memory is unmapped/invalid 970 // Can occur when texture addr is null or its memory is unmapped/invalid
971 state.samplers[binding] = 0;
990 state.textures[binding] = 0; 972 state.textures[binding] = 0;
991 return false; 973 return;
992 } 974 }
993 state.textures[binding] = view->GetTexture(); 975 state.textures[binding] = view->GetTexture();
994 976
995 if (view->GetSurfaceParams().IsBuffer()) { 977 if (view->GetSurfaceParams().IsBuffer()) {
996 return true; 978 return;
997 } 979 }
980 state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
998 981
999 // Apply swizzle to textures that are not buffers. 982 // Apply swizzle to textures that are not buffers.
1000 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, 983 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1001 texture.tic.w_source); 984 texture.tic.w_source);
1002 return false; 985}
986
987void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
988 const auto& maxwell3d = system.GPU().Maxwell3D();
989 u32 binding = device.GetBaseBindings(stage_index).image;
990 for (const auto& entry : shader->GetShaderEntries().images) {
991 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
992 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
993 SetupImage(binding++, tic, entry);
994 }
1003} 995}
1004 996
1005void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 997void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
1006 const auto& compute = system.GPU().KeplerCompute(); 998 const auto& compute = system.GPU().KeplerCompute();
1007 const auto& entries = shader->GetShaderEntries().images; 999 u32 binding = 0;
1008 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 1000 for (const auto& entry : shader->GetShaderEntries().images) {
1009 const auto& entry = entries[bindpoint]; 1001 const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
1010 const auto tic = [&] { 1002 SetupImage(binding++, tic, entry);
1011 if (!entry.IsBindless()) {
1012 return compute.GetTexture(entry.GetOffset()).tic;
1013 }
1014 const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
1015 Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
1016 return compute.GetTextureInfo(tex_handle).tic;
1017 }();
1018 SetupImage(bindpoint, tic, entry);
1019 } 1003 }
1020} 1004}
1021 1005
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bd6fe5c3a..0e47d71df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -83,42 +83,41 @@ private:
83 bool using_depth_fb, bool using_stencil_fb); 83 bool using_depth_fb, bool using_stencil_fb);
84 84
85 /// Configures the current constbuffers to use for the draw command. 85 /// Configures the current constbuffers to use for the draw command.
86 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 86 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
87 const Shader& shader);
88 87
89 /// Configures the current constbuffers to use for the kernel invocation. 88 /// Configures the current constbuffers to use for the kernel invocation.
90 void SetupComputeConstBuffers(const Shader& kernel); 89 void SetupComputeConstBuffers(const Shader& kernel);
91 90
92 /// Configures a constant buffer. 91 /// Configures a constant buffer.
93 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 92 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
94 const GLShader::ConstBufferEntry& entry); 93 const GLShader::ConstBufferEntry& entry);
95 94
96 /// Configures the current global memory entries to use for the draw command. 95 /// Configures the current global memory entries to use for the draw command.
97 void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 96 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
98 const Shader& shader);
99 97
100 /// Configures the current global memory entries to use for the kernel invocation. 98 /// Configures the current global memory entries to use for the kernel invocation.
101 void SetupComputeGlobalMemory(const Shader& kernel); 99 void SetupComputeGlobalMemory(const Shader& kernel);
102 100
103 /// Configures a constant buffer. 101 /// Configures a constant buffer.
104 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 102 void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
105 std::size_t size); 103 std::size_t size);
106 104
107 /// Syncs all the state, shaders, render targets and textures setting before a draw call. 105 /// Syncs all the state, shaders, render targets and textures setting before a draw call.
108 void DrawPrelude(); 106 void DrawPrelude();
109 107
110 /// Configures the current textures to use for the draw command. Returns shaders texture buffer 108 /// Configures the current textures to use for the draw command.
111 /// usage. 109 void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
112 TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
113 const Shader& shader, BaseBindings base_bindings);
114 110
115 /// Configures the textures used in a compute shader. Returns texture buffer usage. 111 /// Configures the textures used in a compute shader.
116 TextureBufferUsage SetupComputeTextures(const Shader& kernel); 112 void SetupComputeTextures(const Shader& kernel);
117 113
118 /// Configures a texture. Returns true when the texture is a texture buffer. 114 /// Configures a texture.
119 bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 115 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
120 const GLShader::SamplerEntry& entry); 116 const GLShader::SamplerEntry& entry);
121 117
118 /// Configures images in a graphics shader.
119 void SetupDrawImages(std::size_t stage_index, const Shader& shader);
120
122 /// Configures images in a compute shader. 121 /// Configures images in a compute shader.
123 void SetupComputeImages(const Shader& shader); 122 void SetupComputeImages(const Shader& shader);
124 123
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 04a239a39..370bdf052 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -8,12 +8,15 @@
8#include <thread> 8#include <thread>
9#include <unordered_set> 9#include <unordered_set>
10#include <boost/functional/hash.hpp> 10#include <boost/functional/hash.hpp>
11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
13#include "common/logging/log.h"
12#include "common/scope_exit.h" 14#include "common/scope_exit.h"
13#include "core/core.h" 15#include "core/core.h"
14#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
15#include "video_core/engines/kepler_compute.h" 17#include "video_core/engines/kepler_compute.h"
16#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/engines/shader_type.h"
17#include "video_core/memory_manager.h" 20#include "video_core/memory_manager.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 21#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_shader_cache.h" 22#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -82,28 +85,26 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
82/// Gets the shader program code from memory for the specified address 85/// Gets the shader program code from memory for the specified address
83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, 86ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
84 const u8* host_ptr) { 87 const u8* host_ptr) {
85 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 88 ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
86 ASSERT_OR_EXECUTE(host_ptr != nullptr, { 89 ASSERT_OR_EXECUTE(host_ptr != nullptr, {
87 std::fill(program_code.begin(), program_code.end(), 0); 90 std::fill(code.begin(), code.end(), 0);
88 return program_code; 91 return code;
89 }); 92 });
90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), 93 memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
91 program_code.size() * sizeof(u64)); 94 code.resize(CalculateProgramSize(code));
92 program_code.resize(CalculateProgramSize(program_code)); 95 return code;
93 return program_code;
94} 96}
95 97
96/// Gets the shader type from a Maxwell program type 98/// Gets the shader type from a Maxwell program type
97constexpr GLenum GetShaderType(ProgramType program_type) { 99constexpr GLenum GetGLShaderType(ShaderType shader_type) {
98 switch (program_type) { 100 switch (shader_type) {
99 case ProgramType::VertexA: 101 case ShaderType::Vertex:
100 case ProgramType::VertexB:
101 return GL_VERTEX_SHADER; 102 return GL_VERTEX_SHADER;
102 case ProgramType::Geometry: 103 case ShaderType::Geometry:
103 return GL_GEOMETRY_SHADER; 104 return GL_GEOMETRY_SHADER;
104 case ProgramType::Fragment: 105 case ShaderType::Fragment:
105 return GL_FRAGMENT_SHADER; 106 return GL_FRAGMENT_SHADER;
106 case ProgramType::Compute: 107 case ShaderType::Compute:
107 return GL_COMPUTE_SHADER; 108 return GL_COMPUTE_SHADER;
108 default: 109 default:
109 return GL_NONE; 110 return GL_NONE;
@@ -133,30 +134,11 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
133 } 134 }
134} 135}
135 136
136ProgramType GetProgramType(Maxwell::ShaderProgram program) {
137 switch (program) {
138 case Maxwell::ShaderProgram::VertexA:
139 return ProgramType::VertexA;
140 case Maxwell::ShaderProgram::VertexB:
141 return ProgramType::VertexB;
142 case Maxwell::ShaderProgram::TesselationControl:
143 return ProgramType::TessellationControl;
144 case Maxwell::ShaderProgram::TesselationEval:
145 return ProgramType::TessellationEval;
146 case Maxwell::ShaderProgram::Geometry:
147 return ProgramType::Geometry;
148 case Maxwell::ShaderProgram::Fragment:
149 return ProgramType::Fragment;
150 }
151 UNREACHABLE();
152 return {};
153}
154
155/// Hashes one (or two) program streams 137/// Hashes one (or two) program streams
156u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, 138u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
157 const ProgramCode& code_b) { 139 const ProgramCode& code_b) {
158 u64 unique_identifier = boost::hash_value(code); 140 u64 unique_identifier = boost::hash_value(code);
159 if (program_type == ProgramType::VertexA) { 141 if (is_a) {
160 // VertexA programs include two programs 142 // VertexA programs include two programs
161 boost::hash_combine(unique_identifier, boost::hash_value(code_b)); 143 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
162 } 144 }
@@ -164,79 +146,74 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
164} 146}
165 147
166/// Creates an unspecialized program from code streams 148/// Creates an unspecialized program from code streams
167std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, 149std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
168 const std::optional<ShaderIR>& ir_b) { 150 const std::optional<ShaderIR>& ir_b) {
169 switch (program_type) { 151 switch (shader_type) {
170 case ProgramType::VertexA: 152 case ShaderType::Vertex:
171 case ProgramType::VertexB:
172 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); 153 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
173 case ProgramType::Geometry: 154 case ShaderType::Geometry:
174 return GLShader::GenerateGeometryShader(device, ir); 155 return GLShader::GenerateGeometryShader(device, ir);
175 case ProgramType::Fragment: 156 case ShaderType::Fragment:
176 return GLShader::GenerateFragmentShader(device, ir); 157 return GLShader::GenerateFragmentShader(device, ir);
177 case ProgramType::Compute: 158 case ShaderType::Compute:
178 return GLShader::GenerateComputeShader(device, ir); 159 return GLShader::GenerateComputeShader(device, ir);
179 default: 160 default:
180 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); 161 UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
181 return {}; 162 return {};
182 } 163 }
183} 164}
184 165
185constexpr const char* GetProgramTypeName(ProgramType program_type) { 166constexpr const char* GetShaderTypeName(ShaderType shader_type) {
186 switch (program_type) { 167 switch (shader_type) {
187 case ProgramType::VertexA: 168 case ShaderType::Vertex:
188 case ProgramType::VertexB:
189 return "VS"; 169 return "VS";
190 case ProgramType::TessellationControl: 170 case ShaderType::TesselationControl:
191 return "TCS"; 171 return "HS";
192 case ProgramType::TessellationEval: 172 case ShaderType::TesselationEval:
193 return "TES"; 173 return "DS";
194 case ProgramType::Geometry: 174 case ShaderType::Geometry:
195 return "GS"; 175 return "GS";
196 case ProgramType::Fragment: 176 case ShaderType::Fragment:
197 return "FS"; 177 return "FS";
198 case ProgramType::Compute: 178 case ShaderType::Compute:
199 return "CS"; 179 return "CS";
200 } 180 }
201 return "UNK"; 181 return "UNK";
202} 182}
203 183
204Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { 184constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
205 switch (program_type) { 185 switch (program_type) {
206 case ProgramType::VertexA: 186 case Maxwell::ShaderProgram::VertexA:
207 case ProgramType::VertexB: 187 case Maxwell::ShaderProgram::VertexB:
208 return Tegra::Engines::ShaderType::Vertex; 188 return ShaderType::Vertex;
209 case ProgramType::TessellationControl: 189 case Maxwell::ShaderProgram::TesselationControl:
210 return Tegra::Engines::ShaderType::TesselationControl; 190 return ShaderType::TesselationControl;
211 case ProgramType::TessellationEval: 191 case Maxwell::ShaderProgram::TesselationEval:
212 return Tegra::Engines::ShaderType::TesselationEval; 192 return ShaderType::TesselationEval;
213 case ProgramType::Geometry: 193 case Maxwell::ShaderProgram::Geometry:
214 return Tegra::Engines::ShaderType::Geometry; 194 return ShaderType::Geometry;
215 case ProgramType::Fragment: 195 case Maxwell::ShaderProgram::Fragment:
216 return Tegra::Engines::ShaderType::Fragment; 196 return ShaderType::Fragment;
217 case ProgramType::Compute: 197 }
218 return Tegra::Engines::ShaderType::Compute;
219 }
220 UNREACHABLE();
221 return {}; 198 return {};
222} 199}
223 200
224std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { 201std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
225 return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); 202 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
226} 203}
227 204
228Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( 205Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
229 Core::System& system, ProgramType program_type) { 206 ShaderType shader_type) {
230 if (program_type == ProgramType::Compute) { 207 if (shader_type == ShaderType::Compute) {
231 return system.GPU().KeplerCompute(); 208 return system.GPU().KeplerCompute();
232 } else { 209 } else {
233 return system.GPU().Maxwell3D(); 210 return system.GPU().Maxwell3D();
234 } 211 }
235} 212}
236 213
237std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { 214std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
238 return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), 215 return std::make_unique<ConstBufferLocker>(shader_type,
239 GetConstBufferEngineInterface(system, program_type)); 216 GetConstBufferEngineInterface(system, shader_type));
240} 217}
241 218
242void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { 219void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
@@ -253,33 +230,26 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
253 } 230 }
254} 231}
255 232
256CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, 233CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
257 const ProgramCode& program_code, const ProgramCode& program_code_b, 234 const ProgramCode& code, const ProgramCode& code_b,
258 const ProgramVariant& variant, ConstBufferLocker& locker, 235 ConstBufferLocker& locker, const ProgramVariant& variant,
259 bool hint_retrievable = false) { 236 bool hint_retrievable = false) {
260 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); 237 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
261 238
262 const bool is_compute = program_type == ProgramType::Compute; 239 const bool is_compute = shader_type == ShaderType::Compute;
263 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; 240 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
264 const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); 241 const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
265 std::optional<ShaderIR> ir_b; 242 std::optional<ShaderIR> ir_b;
266 if (!program_code_b.empty()) { 243 if (!code_b.empty()) {
267 ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); 244 ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
268 } 245 }
269 const auto entries = GLShader::GetEntries(ir); 246 const auto entries = GLShader::GetEntries(ir);
270 247
271 auto base_bindings{variant.base_bindings};
272 const auto primitive_mode{variant.primitive_mode};
273 const auto texture_buffer_usage{variant.texture_buffer_usage};
274
275 std::string source = fmt::format(R"(// {} 248 std::string source = fmt::format(R"(// {}
276#version 430 core 249#version 430 core
277#extension GL_ARB_separate_shader_objects : enable 250#extension GL_ARB_separate_shader_objects : enable
278)", 251)",
279 GetShaderId(unique_identifier, program_type)); 252 GetShaderId(unique_identifier, shader_type));
280 if (is_compute) {
281 source += "#extension GL_ARB_compute_variable_group_size : require\n";
282 }
283 if (device.HasShaderBallot()) { 253 if (device.HasShaderBallot()) {
284 source += "#extension GL_ARB_shader_ballot : require\n"; 254 source += "#extension GL_ARB_shader_ballot : require\n";
285 } 255 }
@@ -296,54 +266,35 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
296 } 266 }
297 source += '\n'; 267 source += '\n';
298 268
299 if (!is_compute) { 269 if (shader_type == ShaderType::Geometry) {
300 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 270 const auto [glsl_topology, debug_name, max_vertices] =
301 } 271 GetPrimitiveDescription(variant.primitive_mode);
302 272
303 for (const auto& cbuf : entries.const_buffers) { 273 source += fmt::format("layout ({}) in;\n\n", glsl_topology);
304 source += 274 source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
305 fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
306 } 275 }
307 for (const auto& gmem : entries.global_memory_entries) { 276 if (shader_type == ShaderType::Compute) {
308 source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
309 gmem.GetCbufOffset(), base_bindings.gmem++);
310 }
311 for (const auto& sampler : entries.samplers) {
312 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
313 base_bindings.sampler++);
314 }
315 for (const auto& image : entries.images) {
316 source += 277 source +=
317 fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); 278 fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
318 } 279 variant.block_x, variant.block_y, variant.block_z);
319 280
320 // Transform 1D textures to texture samplers by declaring its preprocessor macros. 281 if (variant.shared_memory_size > 0) {
321 for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { 282 // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool
322 if (!texture_buffer_usage.test(i)) { 283 // avoids out of bound stores. Find out why shared memory size is being invalid.
323 continue; 284 source += fmt::format("shared uint smem[{}];", variant.shared_memory_size);
324 } 285 }
325 source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
326 }
327 if (texture_buffer_usage.any()) {
328 source += '\n';
329 }
330 286
331 if (program_type == ProgramType::Geometry) { 287 if (variant.local_memory_size > 0) {
332 const auto [glsl_topology, debug_name, max_vertices] = 288 source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
333 GetPrimitiveDescription(primitive_mode); 289 Common::AlignUp(variant.local_memory_size, 4) / 4);
334 290 }
335 source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
336 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
337 }
338 if (program_type == ProgramType::Compute) {
339 source += "layout (local_size_variable) in;\n";
340 } 291 }
341 292
342 source += '\n'; 293 source += '\n';
343 source += GenerateGLSL(device, program_type, ir, ir_b); 294 source += GenerateGLSL(device, shader_type, ir, ir_b);
344 295
345 OGLShader shader; 296 OGLShader shader;
346 shader.Create(source.c_str(), GetShaderType(program_type)); 297 shader.Create(source.c_str(), GetGLShaderType(shader_type));
347 298
348 auto program = std::make_shared<OGLProgram>(); 299 auto program = std::make_shared<OGLProgram>();
349 program->Create(true, hint_retrievable, shader.handle); 300 program->Create(true, hint_retrievable, shader.handle);
@@ -366,18 +317,16 @@ std::unordered_set<GLenum> GetSupportedFormats() {
366 317
367} // Anonymous namespace 318} // Anonymous namespace
368 319
369CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, 320CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
370 GLShader::ShaderEntries entries, ProgramCode program_code, 321 GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
371 ProgramCode program_code_b) 322 : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
372 : RasterizerCacheObject{params.host_ptr}, system{params.system}, 323 device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
373 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, 324 shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
374 unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
375 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
376 if (!params.precompiled_variants) { 325 if (!params.precompiled_variants) {
377 return; 326 return;
378 } 327 }
379 for (const auto& pair : *params.precompiled_variants) { 328 for (const auto& pair : *params.precompiled_variants) {
380 auto locker = MakeLocker(system, program_type); 329 auto locker = MakeLocker(system, shader_type);
381 const auto& usage = pair->first; 330 const auto& usage = pair->first;
382 FillLocker(*locker, usage); 331 FillLocker(*locker, usage);
383 332
@@ -398,94 +347,83 @@ CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_t
398} 347}
399 348
400Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 349Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
401 Maxwell::ShaderProgram program_type, 350 Maxwell::ShaderProgram program_type, ProgramCode code,
402 ProgramCode program_code, ProgramCode program_code_b) { 351 ProgramCode code_b) {
403 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 352 const auto shader_type = GetShaderType(program_type);
404 params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); 353 params.disk_cache.SaveRaw(
405 354 ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
406 ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)), 355
407 params.system.GPU().Maxwell3D()); 356 ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
408 const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); 357 const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
409 // TODO(Rodrigo): Handle VertexA shaders 358 // TODO(Rodrigo): Handle VertexA shaders
410 // std::optional<ShaderIR> ir_b; 359 // std::optional<ShaderIR> ir_b;
411 // if (!program_code_b.empty()) { 360 // if (!code_b.empty()) {
412 // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); 361 // ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
413 // } 362 // }
414 return std::shared_ptr<CachedShader>( 363 return std::shared_ptr<CachedShader>(new CachedShader(
415 new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), 364 params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
416 std::move(program_code), std::move(program_code_b)));
417} 365}
418 366
419Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 367Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
420 params.disk_cache.SaveRaw( 368 params.disk_cache.SaveRaw(
421 ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); 369 ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
422 370
423 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, 371 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
424 params.system.GPU().KeplerCompute()); 372 params.system.GPU().KeplerCompute());
425 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); 373 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
426 return std::shared_ptr<CachedShader>(new CachedShader( 374 return std::shared_ptr<CachedShader>(new CachedShader(
427 params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); 375 params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
428} 376}
429 377
430Shader CachedShader::CreateFromCache(const ShaderParameters& params, 378Shader CachedShader::CreateFromCache(const ShaderParameters& params,
431 const UnspecializedShader& unspecialized) { 379 const UnspecializedShader& unspecialized) {
432 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, 380 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
433 unspecialized.entries, unspecialized.code, 381 unspecialized.entries, unspecialized.code,
434 unspecialized.code_b)); 382 unspecialized.code_b));
435} 383}
436 384
437std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 385GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
438 UpdateVariant(); 386 EnsureValidLockerVariant();
439 387
440 const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); 388 const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
441 auto& program = entry->second; 389 auto& program = entry->second;
442 if (is_cache_miss) { 390 if (!is_cache_miss) {
443 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, 391 return program->handle;
444 variant, *curr_variant->locker);
445 disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
446
447 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
448 } 392 }
449 393
450 auto base_bindings = variant.base_bindings; 394 program = BuildShader(device, unique_identifier, shader_type, code, code_b,
451 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); 395 *curr_locker_variant->locker, variant);
452 if (program_type != ProgramType::Compute) { 396 disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
453 base_bindings.cbuf += STAGE_RESERVED_UBOS;
454 }
455 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
456 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
457 397
458 return {program->handle, base_bindings}; 398 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
399 return program->handle;
459} 400}
460 401
461void CachedShader::UpdateVariant() { 402bool CachedShader::EnsureValidLockerVariant() {
462 if (curr_variant && !curr_variant->locker->IsConsistent()) { 403 const auto previous_variant = curr_locker_variant;
463 curr_variant = nullptr; 404 if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
405 curr_locker_variant = nullptr;
464 } 406 }
465 if (!curr_variant) { 407 if (!curr_locker_variant) {
466 for (auto& variant : locker_variants) { 408 for (auto& variant : locker_variants) {
467 if (variant->locker->IsConsistent()) { 409 if (variant->locker->IsConsistent()) {
468 curr_variant = variant.get(); 410 curr_locker_variant = variant.get();
469 } 411 }
470 } 412 }
471 } 413 }
472 if (!curr_variant) { 414 if (!curr_locker_variant) {
473 auto& new_variant = locker_variants.emplace_back(); 415 auto& new_variant = locker_variants.emplace_back();
474 new_variant = std::make_unique<LockerVariant>(); 416 new_variant = std::make_unique<LockerVariant>();
475 new_variant->locker = MakeLocker(system, program_type); 417 new_variant->locker = MakeLocker(system, shader_type);
476 curr_variant = new_variant.get(); 418 curr_locker_variant = new_variant.get();
477 } 419 }
420 return previous_variant == curr_locker_variant;
478} 421}
479 422
480ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, 423ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
481 const ConstBufferLocker& locker) const { 424 const ConstBufferLocker& locker) const {
482 ShaderDiskCacheUsage usage; 425 return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
483 usage.unique_identifier = unique_identifier; 426 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
484 usage.variant = variant;
485 usage.keys = locker.GetKeys();
486 usage.bound_samplers = locker.GetBoundSamplers();
487 usage.bindless_samplers = locker.GetBindlessSamplers();
488 return usage;
489} 427}
490 428
491ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 429ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -544,11 +482,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
544 } 482 }
545 } 483 }
546 if (!shader) { 484 if (!shader) {
547 auto locker{MakeLocker(system, unspecialized.program_type)}; 485 auto locker{MakeLocker(system, unspecialized.type)};
548 FillLocker(*locker, usage); 486 FillLocker(*locker, usage);
549 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, 487
550 unspecialized.code, unspecialized.code_b, usage.variant, 488 shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
551 *locker, true); 489 unspecialized.code, unspecialized.code_b, *locker,
490 usage.variant, true);
552 } 491 }
553 492
554 std::scoped_lock lock{mutex}; 493 std::scoped_lock lock{mutex};
@@ -651,7 +590,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
651 const auto& raw{raws[i]}; 590 const auto& raw{raws[i]};
652 const u64 unique_identifier{raw.GetUniqueIdentifier()}; 591 const u64 unique_identifier{raw.GetUniqueIdentifier()};
653 const u64 calculated_hash{ 592 const u64 calculated_hash{
654 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; 593 GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
655 if (unique_identifier != calculated_hash) { 594 if (unique_identifier != calculated_hash) {
656 LOG_ERROR(Render_OpenGL, 595 LOG_ERROR(Render_OpenGL,
657 "Invalid hash in entry={:016x} (obtained hash={:016x}) - " 596 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
@@ -662,9 +601,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
662 } 601 }
663 602
664 const u32 main_offset = 603 const u32 main_offset =
665 raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; 604 raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
666 ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); 605 ConstBufferLocker locker(raw.GetType());
667 const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); 606 const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
668 // TODO(Rodrigo): Handle VertexA shaders 607 // TODO(Rodrigo): Handle VertexA shaders
669 // std::optional<ShaderIR> ir_b; 608 // std::optional<ShaderIR> ir_b;
670 // if (raw.HasProgramA()) { 609 // if (raw.HasProgramA()) {
@@ -673,9 +612,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
673 612
674 UnspecializedShader unspecialized; 613 UnspecializedShader unspecialized;
675 unspecialized.entries = GLShader::GetEntries(ir); 614 unspecialized.entries = GLShader::GetEntries(ir);
676 unspecialized.program_type = raw.GetProgramType(); 615 unspecialized.type = raw.GetType();
677 unspecialized.code = raw.GetProgramCode(); 616 unspecialized.code = raw.GetCode();
678 unspecialized.code_b = raw.GetProgramCodeB(); 617 unspecialized.code_b = raw.GetCodeB();
679 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); 618 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
680 619
681 if (callback) { 620 if (callback) {
@@ -708,7 +647,8 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
708 code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); 647 code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
709 } 648 }
710 649
711 const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); 650 const auto unique_identifier = GetUniqueIdentifier(
651 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
712 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); 652 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
713 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 653 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
714 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 654 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
@@ -736,7 +676,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
736 676
737 // No kernel found - create a new one 677 // No kernel found - create a new one
738 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 678 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
739 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; 679 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
740 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); 680 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
741 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 681 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
742 const ShaderParameters params{system, disk_cache, precompiled_variants, device, 682 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6bd7c9cf1..7b1470db3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -17,6 +17,7 @@
17#include <glad/glad.h> 17#include <glad/glad.h>
18 18
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h"
20#include "video_core/rasterizer_cache.h" 21#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 22#include "video_core/renderer_opengl/gl_resource_manager.h"
22#include "video_core/renderer_opengl/gl_shader_decompiler.h" 23#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -47,7 +48,7 @@ using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
47 48
48struct UnspecializedShader { 49struct UnspecializedShader {
49 GLShader::ShaderEntries entries; 50 GLShader::ShaderEntries entries;
50 ProgramType program_type; 51 Tegra::Engines::ShaderType type;
51 ProgramCode code; 52 ProgramCode code;
52 ProgramCode code_b; 53 ProgramCode code_b;
53}; 54};
@@ -77,7 +78,7 @@ public:
77 } 78 }
78 79
79 std::size_t GetSizeInBytes() const override { 80 std::size_t GetSizeInBytes() const override {
80 return program_code.size() * sizeof(u64); 81 return code.size() * sizeof(u64);
81 } 82 }
82 83
83 /// Gets the shader entries for the shader 84 /// Gets the shader entries for the shader
@@ -86,7 +87,7 @@ public:
86 } 87 }
87 88
88 /// Gets the GL program handle for the shader 89 /// Gets the GL program handle for the shader
89 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 90 GLuint GetHandle(const ProgramVariant& variant);
90 91
91private: 92private:
92 struct LockerVariant { 93 struct LockerVariant {
@@ -94,11 +95,11 @@ private:
94 std::unordered_map<ProgramVariant, CachedProgram> programs; 95 std::unordered_map<ProgramVariant, CachedProgram> programs;
95 }; 96 };
96 97
97 explicit CachedShader(const ShaderParameters& params, ProgramType program_type, 98 explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
98 GLShader::ShaderEntries entries, ProgramCode program_code, 99 GLShader::ShaderEntries entries, ProgramCode program_code,
99 ProgramCode program_code_b); 100 ProgramCode program_code_b);
100 101
101 void UpdateVariant(); 102 bool EnsureValidLockerVariant();
102 103
103 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, 104 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
104 const VideoCommon::Shader::ConstBufferLocker& locker) const; 105 const VideoCommon::Shader::ConstBufferLocker& locker) const;
@@ -110,14 +111,14 @@ private:
110 VAddr cpu_addr{}; 111 VAddr cpu_addr{};
111 112
112 u64 unique_identifier{}; 113 u64 unique_identifier{};
113 ProgramType program_type{}; 114 Tegra::Engines::ShaderType shader_type{};
114 115
115 GLShader::ShaderEntries entries; 116 GLShader::ShaderEntries entries;
116 117
117 ProgramCode program_code; 118 ProgramCode code;
118 ProgramCode program_code_b; 119 ProgramCode code_b;
119 120
120 LockerVariant* curr_variant = nullptr; 121 LockerVariant* curr_locker_variant = nullptr;
121 std::vector<std::unique_ptr<LockerVariant>> locker_variants; 122 std::vector<std::unique_ptr<LockerVariant>> locker_variants;
122}; 123};
123 124
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4f2b49170..b17c4e703 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -16,6 +16,7 @@
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/logging/log.h" 17#include "common/logging/log.h"
18#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/engines/shader_type.h"
19#include "video_core/renderer_opengl/gl_device.h" 20#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/renderer_opengl/gl_rasterizer.h" 21#include "video_core/renderer_opengl/gl_rasterizer.h"
21#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -27,6 +28,7 @@ namespace OpenGL::GLShader {
27 28
28namespace { 29namespace {
29 30
31using Tegra::Engines::ShaderType;
30using Tegra::Shader::Attribute; 32using Tegra::Shader::Attribute;
31using Tegra::Shader::AttributeUse; 33using Tegra::Shader::AttributeUse;
32using Tegra::Shader::Header; 34using Tegra::Shader::Header;
@@ -41,6 +43,9 @@ using namespace VideoCommon::Shader;
41using Maxwell = Tegra::Engines::Maxwell3D::Regs; 43using Maxwell = Tegra::Engines::Maxwell3D::Regs;
42using Operation = const OperationNode&; 44using Operation = const OperationNode&;
43 45
46class ASTDecompiler;
47class ExprDecompiler;
48
44enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; 49enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
45 50
46struct TextureAoffi {}; 51struct TextureAoffi {};
@@ -223,7 +228,7 @@ private:
223 Type type{}; 228 Type type{};
224}; 229};
225 230
226constexpr const char* GetTypeString(Type type) { 231const char* GetTypeString(Type type) {
227 switch (type) { 232 switch (type) {
228 case Type::Bool: 233 case Type::Bool:
229 return "bool"; 234 return "bool";
@@ -243,7 +248,7 @@ constexpr const char* GetTypeString(Type type) {
243 } 248 }
244} 249}
245 250
246constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { 251const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
247 switch (image_type) { 252 switch (image_type) {
248 case Tegra::Shader::ImageType::Texture1D: 253 case Tegra::Shader::ImageType::Texture1D:
249 return "1D"; 254 return "1D";
@@ -331,16 +336,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
331 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 336 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
332} 337}
333 338
334constexpr bool IsVertexShader(ProgramType stage) { 339[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) {
335 return stage == ProgramType::VertexA || stage == ProgramType::VertexB; 340 return stage == ShaderType::Vertex;
336} 341}
337 342
338class ASTDecompiler;
339class ExprDecompiler;
340
341class GLSLDecompiler final { 343class GLSLDecompiler final {
342public: 344public:
343 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, 345 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
344 std::string suffix) 346 std::string suffix)
345 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 347 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
346 348
@@ -427,7 +429,7 @@ private:
427 } 429 }
428 430
429 void DeclareGeometry() { 431 void DeclareGeometry() {
430 if (stage != ProgramType::Geometry) { 432 if (stage != ShaderType::Geometry) {
431 return; 433 return;
432 } 434 }
433 435
@@ -510,10 +512,14 @@ private:
510 } 512 }
511 513
512 void DeclareLocalMemory() { 514 void DeclareLocalMemory() {
513 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at 515 if (stage == ShaderType::Compute) {
514 // specialization time. 516 code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
515 const u64 local_memory_size = 517 code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
516 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); 518 code.AddLine("#endif");
519 return;
520 }
521
522 const u64 local_memory_size = header.GetLocalMemorySize();
517 if (local_memory_size == 0) { 523 if (local_memory_size == 0) {
518 return; 524 return;
519 } 525 }
@@ -522,13 +528,6 @@ private:
522 code.AddNewLine(); 528 code.AddNewLine();
523 } 529 }
524 530
525 void DeclareSharedMemory() {
526 if (stage != ProgramType::Compute) {
527 return;
528 }
529 code.AddLine("shared uint {}[];", GetSharedMemory());
530 }
531
532 void DeclareInternalFlags() { 531 void DeclareInternalFlags() {
533 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { 532 for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
534 const auto flag_code = static_cast<InternalFlag>(flag); 533 const auto flag_code = static_cast<InternalFlag>(flag);
@@ -578,12 +577,12 @@ private:
578 const u32 location{GetGenericAttributeIndex(index)}; 577 const u32 location{GetGenericAttributeIndex(index)};
579 578
580 std::string name{GetInputAttribute(index)}; 579 std::string name{GetInputAttribute(index)};
581 if (stage == ProgramType::Geometry) { 580 if (stage == ShaderType::Geometry) {
582 name = "gs_" + name + "[]"; 581 name = "gs_" + name + "[]";
583 } 582 }
584 583
585 std::string suffix; 584 std::string suffix;
586 if (stage == ProgramType::Fragment) { 585 if (stage == ShaderType::Fragment) {
587 const auto input_mode{header.ps.GetAttributeUse(location)}; 586 const auto input_mode{header.ps.GetAttributeUse(location)};
588 if (skip_unused && input_mode == AttributeUse::Unused) { 587 if (skip_unused && input_mode == AttributeUse::Unused) {
589 return; 588 return;
@@ -595,7 +594,7 @@ private:
595 } 594 }
596 595
597 void DeclareOutputAttributes() { 596 void DeclareOutputAttributes() {
598 if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { 597 if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) {
599 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { 598 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
600 DeclareOutputAttribute(ToGenericAttribute(i)); 599 DeclareOutputAttribute(ToGenericAttribute(i));
601 } 600 }
@@ -620,9 +619,9 @@ private:
620 } 619 }
621 620
622 void DeclareConstantBuffers() { 621 void DeclareConstantBuffers() {
623 for (const auto& entry : ir.GetConstantBuffers()) { 622 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
624 const auto [index, size] = entry; 623 for (const auto& [index, cbuf] : ir.GetConstantBuffers()) {
625 code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, 624 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
626 GetConstBufferBlock(index)); 625 GetConstBufferBlock(index));
627 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); 626 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
628 code.AddLine("}};"); 627 code.AddLine("}};");
@@ -631,9 +630,8 @@ private:
631 } 630 }
632 631
633 void DeclareGlobalMemory() { 632 void DeclareGlobalMemory() {
634 for (const auto& gmem : ir.GetGlobalMemory()) { 633 u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
635 const auto& [base, usage] = gmem; 634 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
636
637 // Since we don't know how the shader will use the shader, hint the driver to disable as 635 // Since we don't know how the shader will use the shader, hint the driver to disable as
638 // much optimizations as possible 636 // much optimizations as possible
639 std::string qualifier = "coherent volatile"; 637 std::string qualifier = "coherent volatile";
@@ -643,8 +641,8 @@ private:
643 qualifier += " writeonly"; 641 qualifier += " writeonly";
644 } 642 }
645 643
646 code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", 644 code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
647 base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); 645 GetGlobalMemoryBlock(base));
648 code.AddLine(" uint {}[];", GetGlobalMemory(base)); 646 code.AddLine(" uint {}[];", GetGlobalMemory(base));
649 code.AddLine("}};"); 647 code.AddLine("}};");
650 code.AddNewLine(); 648 code.AddNewLine();
@@ -652,15 +650,17 @@ private:
652 } 650 }
653 651
654 void DeclareSamplers() { 652 void DeclareSamplers() {
655 const auto& samplers = ir.GetSamplers(); 653 u32 binding = device.GetBaseBindings(stage).sampler;
656 for (const auto& sampler : samplers) { 654 for (const auto& sampler : ir.GetSamplers()) {
657 const std::string name{GetSampler(sampler)}; 655 const std::string name = GetSampler(sampler);
658 const std::string description{"layout (binding = SAMPLER_BINDING_" + 656 const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
659 std::to_string(sampler.GetIndex()) + ") uniform"}; 657
660 std::string sampler_type = [&]() { 658 std::string sampler_type = [&]() {
659 if (sampler.IsBuffer()) {
660 return "samplerBuffer";
661 }
661 switch (sampler.GetType()) { 662 switch (sampler.GetType()) {
662 case Tegra::Shader::TextureType::Texture1D: 663 case Tegra::Shader::TextureType::Texture1D:
663 // Special cased, read below.
664 return "sampler1D"; 664 return "sampler1D";
665 case Tegra::Shader::TextureType::Texture2D: 665 case Tegra::Shader::TextureType::Texture2D:
666 return "sampler2D"; 666 return "sampler2D";
@@ -680,21 +680,9 @@ private:
680 sampler_type += "Shadow"; 680 sampler_type += "Shadow";
681 } 681 }
682 682
683 if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { 683 code.AddLine("{} {} {};", description, sampler_type, name);
684 // 1D textures can be aliased to texture buffers, hide the declarations behind a
685 // preprocessor flag and use one or the other from the GPU state. This has to be
686 // done because shaders don't have enough information to determine the texture type.
687 EmitIfdefIsBuffer(sampler);
688 code.AddLine("{} samplerBuffer {};", description, name);
689 code.AddLine("#else");
690 code.AddLine("{} {} {};", description, sampler_type, name);
691 code.AddLine("#endif");
692 } else {
693 // The other texture types (2D, 3D and cubes) don't have this issue.
694 code.AddLine("{} {} {};", description, sampler_type, name);
695 }
696 } 684 }
697 if (!samplers.empty()) { 685 if (!ir.GetSamplers().empty()) {
698 code.AddNewLine(); 686 code.AddNewLine();
699 } 687 }
700 } 688 }
@@ -717,7 +705,7 @@ private:
717 constexpr u32 element_stride = 4; 705 constexpr u32 element_stride = 4;
718 const u32 address{generic_base + index * generic_stride + element * element_stride}; 706 const u32 address{generic_base + index * generic_stride + element * element_stride};
719 707
720 const bool declared = stage != ProgramType::Fragment || 708 const bool declared = stage != ShaderType::Fragment ||
721 header.ps.GetAttributeUse(index) != AttributeUse::Unused; 709 header.ps.GetAttributeUse(index) != AttributeUse::Unused;
722 const std::string value = 710 const std::string value =
723 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; 711 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
@@ -734,8 +722,8 @@ private:
734 } 722 }
735 723
736 void DeclareImages() { 724 void DeclareImages() {
737 const auto& images{ir.GetImages()}; 725 u32 binding = device.GetBaseBindings(stage).image;
738 for (const auto& image : images) { 726 for (const auto& image : ir.GetImages()) {
739 std::string qualifier = "coherent volatile"; 727 std::string qualifier = "coherent volatile";
740 if (image.IsRead() && !image.IsWritten()) { 728 if (image.IsRead() && !image.IsWritten()) {
741 qualifier += " readonly"; 729 qualifier += " readonly";
@@ -745,10 +733,10 @@ private:
745 733
746 const char* format = image.IsAtomic() ? "r32ui, " : ""; 734 const char* format = image.IsAtomic() ? "r32ui, " : "";
747 const char* type_declaration = GetImageTypeDeclaration(image.GetType()); 735 const char* type_declaration = GetImageTypeDeclaration(image.GetType());
748 code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, 736 code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++,
749 image.GetIndex(), qualifier, type_declaration, GetImage(image)); 737 qualifier, type_declaration, GetImage(image));
750 } 738 }
751 if (!images.empty()) { 739 if (!ir.GetImages().empty()) {
752 code.AddNewLine(); 740 code.AddNewLine();
753 } 741 }
754 } 742 }
@@ -809,7 +797,7 @@ private:
809 } 797 }
810 798
811 if (const auto abuf = std::get_if<AbufNode>(&*node)) { 799 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
812 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, 800 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry,
813 "Physical attributes in geometry shaders are not implemented"); 801 "Physical attributes in geometry shaders are not implemented");
814 if (abuf->IsPhysicalBuffer()) { 802 if (abuf->IsPhysicalBuffer()) {
815 return {fmt::format("ReadPhysicalAttribute({})", 803 return {fmt::format("ReadPhysicalAttribute({})",
@@ -868,18 +856,13 @@ private:
868 } 856 }
869 857
870 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 858 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
871 if (stage == ProgramType::Compute) {
872 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
873 }
874 return { 859 return {
875 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), 860 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
876 Type::Uint}; 861 Type::Uint};
877 } 862 }
878 863
879 if (const auto smem = std::get_if<SmemNode>(&*node)) { 864 if (const auto smem = std::get_if<SmemNode>(&*node)) {
880 return { 865 return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
881 fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
882 Type::Uint};
883 } 866 }
884 867
885 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { 868 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -909,7 +892,7 @@ private:
909 892
910 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { 893 Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
911 const auto GeometryPass = [&](std::string_view name) { 894 const auto GeometryPass = [&](std::string_view name) {
912 if (stage == ProgramType::Geometry && buffer) { 895 if (stage == ShaderType::Geometry && buffer) {
913 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 896 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
914 // set an 0x80000000 index for those and the shader fails to build. Find out why 897 // set an 0x80000000 index for those and the shader fails to build. Find out why
915 // this happens and what's its intent. 898 // this happens and what's its intent.
@@ -921,11 +904,11 @@ private:
921 switch (attribute) { 904 switch (attribute) {
922 case Attribute::Index::Position: 905 case Attribute::Index::Position:
923 switch (stage) { 906 switch (stage) {
924 case ProgramType::Geometry: 907 case ShaderType::Geometry:
925 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), 908 return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
926 GetSwizzle(element)), 909 GetSwizzle(element)),
927 Type::Float}; 910 Type::Float};
928 case ProgramType::Fragment: 911 case ShaderType::Fragment:
929 return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), 912 return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
930 Type::Float}; 913 Type::Float};
931 default: 914 default:
@@ -959,7 +942,7 @@ private:
959 return {"0", Type::Int}; 942 return {"0", Type::Int};
960 case Attribute::Index::FrontFacing: 943 case Attribute::Index::FrontFacing:
961 // TODO(Subv): Find out what the values are for the other elements. 944 // TODO(Subv): Find out what the values are for the other elements.
962 ASSERT(stage == ProgramType::Fragment); 945 ASSERT(stage == ShaderType::Fragment);
963 switch (element) { 946 switch (element) {
964 case 3: 947 case 3:
965 return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; 948 return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
@@ -985,7 +968,7 @@ private:
985 // be found in fragment shaders, so we disable precise there. There are vertex shaders that 968 // be found in fragment shaders, so we disable precise there. There are vertex shaders that
986 // also fail to build but nobody seems to care about those. 969 // also fail to build but nobody seems to care about those.
987 // Note: Only bugged drivers will skip precise. 970 // Note: Only bugged drivers will skip precise.
988 const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment; 971 const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment;
989 972
990 std::string temporary = code.GenerateTemporary(); 973 std::string temporary = code.GenerateTemporary();
991 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), 974 code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
@@ -1247,17 +1230,12 @@ private:
1247 } 1230 }
1248 target = std::move(*output); 1231 target = std::move(*output);
1249 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1232 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1250 if (stage == ProgramType::Compute) {
1251 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1252 }
1253 target = { 1233 target = {
1254 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), 1234 fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
1255 Type::Uint}; 1235 Type::Uint};
1256 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { 1236 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1257 ASSERT(stage == ProgramType::Compute); 1237 ASSERT(stage == ShaderType::Compute);
1258 target = { 1238 target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint};
1259 fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
1260 Type::Uint};
1261 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1239 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1262 const std::string real = Visit(gmem->GetRealAddress()).AsUint(); 1240 const std::string real = Visit(gmem->GetRealAddress()).AsUint();
1263 const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); 1241 const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -1749,27 +1727,14 @@ private:
1749 expr += ", "; 1727 expr += ", ";
1750 } 1728 }
1751 1729
1752 // Store a copy of the expression without the lod to be used with texture buffers 1730 if (meta->lod && !meta->sampler.IsBuffer()) {
1753 std::string expr_buffer = expr;
1754
1755 if (meta->lod) {
1756 expr += ", "; 1731 expr += ", ";
1757 expr += Visit(meta->lod).AsInt(); 1732 expr += Visit(meta->lod).AsInt();
1758 } 1733 }
1759 expr += ')'; 1734 expr += ')';
1760 expr += GetSwizzle(meta->element); 1735 expr += GetSwizzle(meta->element);
1761 1736
1762 expr_buffer += ')'; 1737 return {std::move(expr), Type::Float};
1763 expr_buffer += GetSwizzle(meta->element);
1764
1765 const std::string tmp{code.GenerateTemporary()};
1766 EmitIfdefIsBuffer(meta->sampler);
1767 code.AddLine("float {} = {};", tmp, expr_buffer);
1768 code.AddLine("#else");
1769 code.AddLine("float {} = {};", tmp, expr);
1770 code.AddLine("#endif");
1771
1772 return {tmp, Type::Float};
1773 } 1738 }
1774 1739
1775 Expression ImageLoad(Operation operation) { 1740 Expression ImageLoad(Operation operation) {
@@ -1837,7 +1802,7 @@ private:
1837 } 1802 }
1838 1803
1839 void PreExit() { 1804 void PreExit() {
1840 if (stage != ProgramType::Fragment) { 1805 if (stage != ShaderType::Fragment) {
1841 return; 1806 return;
1842 } 1807 }
1843 const auto& used_registers = ir.GetRegisters(); 1808 const auto& used_registers = ir.GetRegisters();
@@ -1890,14 +1855,14 @@ private:
1890 } 1855 }
1891 1856
1892 Expression EmitVertex(Operation operation) { 1857 Expression EmitVertex(Operation operation) {
1893 ASSERT_MSG(stage == ProgramType::Geometry, 1858 ASSERT_MSG(stage == ShaderType::Geometry,
1894 "EmitVertex is expected to be used in a geometry shader."); 1859 "EmitVertex is expected to be used in a geometry shader.");
1895 code.AddLine("EmitVertex();"); 1860 code.AddLine("EmitVertex();");
1896 return {}; 1861 return {};
1897 } 1862 }
1898 1863
1899 Expression EndPrimitive(Operation operation) { 1864 Expression EndPrimitive(Operation operation) {
1900 ASSERT_MSG(stage == ProgramType::Geometry, 1865 ASSERT_MSG(stage == ShaderType::Geometry,
1901 "EndPrimitive is expected to be used in a geometry shader."); 1866 "EndPrimitive is expected to be used in a geometry shader.");
1902 code.AddLine("EndPrimitive();"); 1867 code.AddLine("EndPrimitive();");
1903 return {}; 1868 return {};
@@ -2193,10 +2158,6 @@ private:
2193 return "lmem_" + suffix; 2158 return "lmem_" + suffix;
2194 } 2159 }
2195 2160
2196 std::string GetSharedMemory() const {
2197 return fmt::format("smem_{}", suffix);
2198 }
2199
2200 std::string GetInternalFlag(InternalFlag flag) const { 2161 std::string GetInternalFlag(InternalFlag flag) const {
2201 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", 2162 constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
2202 "overflow_flag"}; 2163 "overflow_flag"};
@@ -2214,10 +2175,6 @@ private:
2214 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); 2175 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
2215 } 2176 }
2216 2177
2217 void EmitIfdefIsBuffer(const Sampler& sampler) {
2218 code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
2219 }
2220
2221 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { 2178 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
2222 return fmt::format("{}_{}_{}", name, index, suffix); 2179 return fmt::format("{}_{}_{}", name, index, suffix);
2223 } 2180 }
@@ -2236,7 +2193,7 @@ private:
2236 2193
2237 const Device& device; 2194 const Device& device;
2238 const ShaderIR& ir; 2195 const ShaderIR& ir;
2239 const ProgramType stage; 2196 const ShaderType stage;
2240 const std::string suffix; 2197 const std::string suffix;
2241 const Header header; 2198 const Header header;
2242 2199
@@ -2491,7 +2448,7 @@ const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2491)"; 2448)";
2492} 2449}
2493 2450
2494std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, 2451std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
2495 const std::string& suffix) { 2452 const std::string& suffix) {
2496 GLSLDecompiler decompiler(device, ir, stage, suffix); 2453 GLSLDecompiler decompiler(device, ir, stage, suffix);
2497 decompiler.Decompile(); 2454 decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index b1e75e6cc..7876f48d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -10,6 +10,7 @@
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h"
13#include "video_core/shader/shader_ir.h" 14#include "video_core/shader/shader_ir.h"
14 15
15namespace VideoCommon::Shader { 16namespace VideoCommon::Shader {
@@ -17,20 +18,8 @@ class ShaderIR;
17} 18}
18 19
19namespace OpenGL { 20namespace OpenGL {
20
21class Device; 21class Device;
22 22}
23enum class ProgramType : u32 {
24 VertexA = 0,
25 VertexB = 1,
26 TessellationControl = 2,
27 TessellationEval = 3,
28 Geometry = 4,
29 Fragment = 5,
30 Compute = 6
31};
32
33} // namespace OpenGL
34 23
35namespace OpenGL::GLShader { 24namespace OpenGL::GLShader {
36 25
@@ -94,6 +83,6 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
94std::string GetCommonDeclarations(); 83std::string GetCommonDeclarations();
95 84
96std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 85std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
97 ProgramType stage, const std::string& suffix); 86 Tegra::Engines::ShaderType stage, const std::string& suffix);
98 87
99} // namespace OpenGL::GLShader 88} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 184a565e6..cf874a09a 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6
6#include <fmt/format.h> 7#include <fmt/format.h>
7 8
8#include "common/assert.h" 9#include "common/assert.h"
@@ -12,50 +13,50 @@
12#include "common/logging/log.h" 13#include "common/logging/log.h"
13#include "common/scm_rev.h" 14#include "common/scm_rev.h"
14#include "common/zstd_compression.h" 15#include "common/zstd_compression.h"
15
16#include "core/core.h" 16#include "core/core.h"
17#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
18#include "core/settings.h" 18#include "core/settings.h"
19 19#include "video_core/engines/shader_type.h"
20#include "video_core/renderer_opengl/gl_shader_cache.h" 20#include "video_core/renderer_opengl/gl_shader_cache.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
22 22
23namespace OpenGL { 23namespace OpenGL {
24 24
25using Tegra::Engines::ShaderType;
25using VideoCommon::Shader::BindlessSamplerMap; 26using VideoCommon::Shader::BindlessSamplerMap;
26using VideoCommon::Shader::BoundSamplerMap; 27using VideoCommon::Shader::BoundSamplerMap;
27using VideoCommon::Shader::KeyMap; 28using VideoCommon::Shader::KeyMap;
28 29
29namespace { 30namespace {
30 31
32using ShaderCacheVersionHash = std::array<u8, 64>;
33
34enum class TransferableEntryKind : u32 {
35 Raw,
36 Usage,
37};
38
31struct ConstBufferKey { 39struct ConstBufferKey {
32 u32 cbuf; 40 u32 cbuf{};
33 u32 offset; 41 u32 offset{};
34 u32 value; 42 u32 value{};
35}; 43};
36 44
37struct BoundSamplerKey { 45struct BoundSamplerKey {
38 u32 offset; 46 u32 offset{};
39 Tegra::Engines::SamplerDescriptor sampler; 47 Tegra::Engines::SamplerDescriptor sampler{};
40}; 48};
41 49
42struct BindlessSamplerKey { 50struct BindlessSamplerKey {
43 u32 cbuf; 51 u32 cbuf{};
44 u32 offset; 52 u32 offset{};
45 Tegra::Engines::SamplerDescriptor sampler; 53 Tegra::Engines::SamplerDescriptor sampler{};
46};
47
48using ShaderCacheVersionHash = std::array<u8, 64>;
49
50enum class TransferableEntryKind : u32 {
51 Raw,
52 Usage,
53}; 54};
54 55
55constexpr u32 NativeVersion = 5; 56constexpr u32 NativeVersion = 11;
56 57
57// Making sure sizes doesn't change by accident 58// Making sure sizes doesn't change by accident
58static_assert(sizeof(BaseBindings) == 16); 59static_assert(sizeof(ProgramVariant) == 20);
59 60
60ShaderCacheVersionHash GetShaderCacheVersionHash() { 61ShaderCacheVersionHash GetShaderCacheVersionHash() {
61 ShaderCacheVersionHash hash{}; 62 ShaderCacheVersionHash hash{};
@@ -66,10 +67,10 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
66 67
67} // Anonymous namespace 68} // Anonymous namespace
68 69
69ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 70ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
70 ProgramCode program_code, ProgramCode program_code_b) 71 ProgramCode code_b)
71 : unique_identifier{unique_identifier}, program_type{program_type}, 72 : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
72 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} 73 code_b)} {}
73 74
74ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 75ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
75 76
@@ -77,42 +78,39 @@ ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
77 78
78bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { 79bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
79 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || 80 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
80 file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) { 81 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
81 return false; 82 return false;
82 } 83 }
83 u32 program_code_size{}; 84 u32 code_size{};
84 u32 program_code_size_b{}; 85 u32 code_size_b{};
85 if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) || 86 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
86 file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) { 87 file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
87 return false; 88 return false;
88 } 89 }
89 90
90 program_code.resize(program_code_size); 91 code.resize(code_size);
91 program_code_b.resize(program_code_size_b); 92 code_b.resize(code_size_b);
92 93
93 if (file.ReadArray(program_code.data(), program_code_size) != program_code_size) 94 if (file.ReadArray(code.data(), code_size) != code_size)
94 return false; 95 return false;
95 96
96 if (HasProgramA() && 97 if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
97 file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
98 return false; 98 return false;
99 } 99 }
100 return true; 100 return true;
101} 101}
102 102
103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
104 if (file.WriteObject(unique_identifier) != 1 || 104 if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
105 file.WriteObject(static_cast<u32>(program_type)) != 1 || 105 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
106 file.WriteObject(static_cast<u32>(program_code.size())) != 1 || 106 file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
107 file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
108 return false; 107 return false;
109 } 108 }
110 109
111 if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) 110 if (file.WriteArray(code.data(), code.size()) != code.size())
112 return false; 111 return false;
113 112
114 if (HasProgramA() && 113 if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
115 file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
116 return false; 114 return false;
117 } 115 }
118 return true; 116 return true;
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index db23ada93..69a2fbdda 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <bitset>
8#include <optional> 7#include <optional>
9#include <string> 8#include <string>
10#include <tuple> 9#include <tuple>
@@ -19,6 +18,7 @@
19#include "common/assert.h" 18#include "common/assert.h"
20#include "common/common_types.h" 19#include "common/common_types.h"
21#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/shader_type.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/shader/const_buffer_locker.h" 23#include "video_core/shader/const_buffer_locker.h"
24 24
@@ -37,42 +37,42 @@ struct ShaderDiskCacheDump;
37 37
38using ProgramCode = std::vector<u64>; 38using ProgramCode = std::vector<u64>;
39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; 39using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
40using TextureBufferUsage = std::bitset<64>;
41
42/// Allocated bindings used by an OpenGL shader program
43struct BaseBindings {
44 u32 cbuf{};
45 u32 gmem{};
46 u32 sampler{};
47 u32 image{};
48
49 bool operator==(const BaseBindings& rhs) const {
50 return std::tie(cbuf, gmem, sampler, image) ==
51 std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
52 }
53 40
54 bool operator!=(const BaseBindings& rhs) const { 41/// Describes the different variants a program can be compiled with.
55 return !operator==(rhs); 42struct ProgramVariant final {
56 } 43 ProgramVariant() = default;
57}; 44
58static_assert(std::is_trivially_copyable_v<BaseBindings>); 45 /// Graphics constructor.
46 explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
47 : primitive_mode{primitive_mode} {}
48
49 /// Compute constructor.
50 explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
51 u32 local_memory_size) noexcept
52 : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
53 shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
59 54
60/// Describes the different variants a single program can be compiled. 55 // Graphics specific parameters.
61struct ProgramVariant {
62 BaseBindings base_bindings;
63 GLenum primitive_mode{}; 56 GLenum primitive_mode{};
64 TextureBufferUsage texture_buffer_usage{};
65 57
66 bool operator==(const ProgramVariant& rhs) const { 58 // Compute specific parameters.
67 return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == 59 u32 block_x{};
68 std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); 60 u16 block_y{};
61 u16 block_z{};
62 u32 shared_memory_size{};
63 u32 local_memory_size{};
64
65 bool operator==(const ProgramVariant& rhs) const noexcept {
66 return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
67 local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
68 rhs.block_z, rhs.shared_memory_size,
69 rhs.local_memory_size);
69 } 70 }
70 71
71 bool operator!=(const ProgramVariant& rhs) const { 72 bool operator!=(const ProgramVariant& rhs) const noexcept {
72 return !operator==(rhs); 73 return !operator==(rhs);
73 } 74 }
74}; 75};
75
76static_assert(std::is_trivially_copyable_v<ProgramVariant>); 76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77 77
78/// Describes how a shader is used. 78/// Describes how a shader is used.
@@ -99,21 +99,14 @@ struct ShaderDiskCacheUsage {
99namespace std { 99namespace std {
100 100
101template <> 101template <>
102struct hash<OpenGL::BaseBindings> {
103 std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
104 return static_cast<std::size_t>(bindings.cbuf) ^
105 (static_cast<std::size_t>(bindings.gmem) << 8) ^
106 (static_cast<std::size_t>(bindings.sampler) << 16) ^
107 (static_cast<std::size_t>(bindings.image) << 24);
108 }
109};
110
111template <>
112struct hash<OpenGL::ProgramVariant> { 102struct hash<OpenGL::ProgramVariant> {
113 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { 103 std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
114 return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ 104 return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
115 std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^ 105 static_cast<std::size_t>(variant.block_x) ^
116 (static_cast<std::size_t>(variant.primitive_mode) << 6); 106 (static_cast<std::size_t>(variant.block_y) << 32) ^
107 (static_cast<std::size_t>(variant.block_z) << 48) ^
108 (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
109 (static_cast<std::size_t>(variant.local_memory_size) << 36);
117 } 110 }
118}; 111};
119 112
@@ -121,7 +114,7 @@ template <>
121struct hash<OpenGL::ShaderDiskCacheUsage> { 114struct hash<OpenGL::ShaderDiskCacheUsage> {
122 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { 115 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
123 return static_cast<std::size_t>(usage.unique_identifier) ^ 116 return static_cast<std::size_t>(usage.unique_identifier) ^
124 std::hash<OpenGL::ProgramVariant>()(usage.variant); 117 std::hash<OpenGL::ProgramVariant>{}(usage.variant);
125 } 118 }
126}; 119};
127 120
@@ -132,8 +125,8 @@ namespace OpenGL {
132/// Describes a shader how it's used by the guest GPU 125/// Describes a shader how it's used by the guest GPU
133class ShaderDiskCacheRaw { 126class ShaderDiskCacheRaw {
134public: 127public:
135 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 128 explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
136 ProgramCode program_code, ProgramCode program_code_b = {}); 129 ProgramCode code, ProgramCode code_b = {});
137 ShaderDiskCacheRaw(); 130 ShaderDiskCacheRaw();
138 ~ShaderDiskCacheRaw(); 131 ~ShaderDiskCacheRaw();
139 132
@@ -146,27 +139,26 @@ public:
146 } 139 }
147 140
148 bool HasProgramA() const { 141 bool HasProgramA() const {
149 return program_type == ProgramType::VertexA; 142 return !code.empty() && !code_b.empty();
150 } 143 }
151 144
152 ProgramType GetProgramType() const { 145 Tegra::Engines::ShaderType GetType() const {
153 return program_type; 146 return type;
154 } 147 }
155 148
156 const ProgramCode& GetProgramCode() const { 149 const ProgramCode& GetCode() const {
157 return program_code; 150 return code;
158 } 151 }
159 152
160 const ProgramCode& GetProgramCodeB() const { 153 const ProgramCode& GetCodeB() const {
161 return program_code_b; 154 return code_b;
162 } 155 }
163 156
164private: 157private:
165 u64 unique_identifier{}; 158 u64 unique_identifier{};
166 ProgramType program_type{}; 159 Tegra::Engines::ShaderType type{};
167 160 ProgramCode code;
168 ProgramCode program_code; 161 ProgramCode code_b;
169 ProgramCode program_code_b;
170}; 162};
171 163
172/// Contains an OpenGL dumped binary program 164/// Contains an OpenGL dumped binary program
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index a63c1a6b8..34946fb47 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,8 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <string>
6
5#include <fmt/format.h> 7#include <fmt/format.h>
8
6#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/shader_type.h"
11#include "video_core/renderer_opengl/gl_device.h"
7#include "video_core/renderer_opengl/gl_shader_decompiler.h" 12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
8#include "video_core/renderer_opengl/gl_shader_gen.h" 13#include "video_core/renderer_opengl/gl_shader_gen.h"
9#include "video_core/shader/shader_ir.h" 14#include "video_core/shader/shader_ir.h"
@@ -11,6 +16,7 @@
11namespace OpenGL::GLShader { 16namespace OpenGL::GLShader {
12 17
13using Tegra::Engines::Maxwell3D; 18using Tegra::Engines::Maxwell3D;
19using Tegra::Engines::ShaderType;
14using VideoCommon::Shader::CompileDepth; 20using VideoCommon::Shader::CompileDepth;
15using VideoCommon::Shader::CompilerSettings; 21using VideoCommon::Shader::CompilerSettings;
16using VideoCommon::Shader::ProgramCode; 22using VideoCommon::Shader::ProgramCode;
@@ -18,16 +24,16 @@ using VideoCommon::Shader::ShaderIR;
18 24
19std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { 25std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
20 std::string out = GetCommonDeclarations(); 26 std::string out = GetCommonDeclarations();
21 out += R"( 27 out += fmt::format(R"(
22layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { 28layout (std140, binding = {}) uniform vs_config {{
23 float y_direction; 29 float y_direction;
24}; 30}};
25 31
26)"; 32)",
27 const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; 33 EmulationUniformBlockBinding);
28 out += Decompile(device, ir, stage, "vertex"); 34 out += Decompile(device, ir, ShaderType::Vertex, "vertex");
29 if (ir_b) { 35 if (ir_b) {
30 out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); 36 out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
31 } 37 }
32 38
33 out += R"( 39 out += R"(
@@ -44,13 +50,14 @@ void main() {
44 50
45std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { 51std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
46 std::string out = GetCommonDeclarations(); 52 std::string out = GetCommonDeclarations();
47 out += R"( 53 out += fmt::format(R"(
48layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { 54layout (std140, binding = {}) uniform gs_config {{
49 float y_direction; 55 float y_direction;
50}; 56}};
51 57
52)"; 58)",
53 out += Decompile(device, ir, ProgramType::Geometry, "geometry"); 59 EmulationUniformBlockBinding);
60 out += Decompile(device, ir, ShaderType::Geometry, "geometry");
54 61
55 out += R"( 62 out += R"(
56void main() { 63void main() {
@@ -62,7 +69,7 @@ void main() {
62 69
63std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { 70std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
64 std::string out = GetCommonDeclarations(); 71 std::string out = GetCommonDeclarations();
65 out += R"( 72 out += fmt::format(R"(
66layout (location = 0) out vec4 FragColor0; 73layout (location = 0) out vec4 FragColor0;
67layout (location = 1) out vec4 FragColor1; 74layout (location = 1) out vec4 FragColor1;
68layout (location = 2) out vec4 FragColor2; 75layout (location = 2) out vec4 FragColor2;
@@ -72,12 +79,13 @@ layout (location = 5) out vec4 FragColor5;
72layout (location = 6) out vec4 FragColor6; 79layout (location = 6) out vec4 FragColor6;
73layout (location = 7) out vec4 FragColor7; 80layout (location = 7) out vec4 FragColor7;
74 81
75layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { 82layout (std140, binding = {}) uniform fs_config {{
76 float y_direction; 83 float y_direction;
77}; 84}};
78 85
79)"; 86)",
80 out += Decompile(device, ir, ProgramType::Fragment, "fragment"); 87 EmulationUniformBlockBinding);
88 out += Decompile(device, ir, ShaderType::Fragment, "fragment");
81 89
82 out += R"( 90 out += R"(
83void main() { 91void main() {
@@ -89,7 +97,7 @@ void main() {
89 97
90std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { 98std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
91 std::string out = GetCommonDeclarations(); 99 std::string out = GetCommonDeclarations();
92 out += Decompile(device, ir, ProgramType::Compute, "compute"); 100 out += Decompile(device, ir, ShaderType::Compute, "compute");
93 out += R"( 101 out += R"(
94void main() { 102void main() {
95 execute_compute(); 103 execute_compute();
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index ccbe5912e..39b3986d3 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -417,14 +417,24 @@ void OpenGLState::ApplyClipControl() {
417} 417}
418 418
419void OpenGLState::ApplyTextures() { 419void OpenGLState::ApplyTextures() {
420 if (const auto update = UpdateArray(cur_state.textures, textures)) { 420 const std::size_t size = std::size(textures);
421 glBindTextures(update->first, update->second, textures.data() + update->first); 421 for (std::size_t i = 0; i < size; ++i) {
422 if (UpdateValue(cur_state.textures[i], textures[i])) {
423 // BindTextureUnit doesn't support binding null textures, skip those binds.
424 // TODO(Rodrigo): Stop using null textures
425 if (textures[i] != 0) {
426 glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
427 }
428 }
422 } 429 }
423} 430}
424 431
425void OpenGLState::ApplySamplers() { 432void OpenGLState::ApplySamplers() {
426 if (const auto update = UpdateArray(cur_state.samplers, samplers)) { 433 const std::size_t size = std::size(samplers);
427 glBindSamplers(update->first, update->second, samplers.data() + update->first); 434 for (std::size_t i = 0; i < size; ++i) {
435 if (UpdateValue(cur_state.samplers[i], samplers[i])) {
436 glBindSampler(static_cast<GLuint>(i), samplers[i]);
437 }
428 } 438 }
429} 439}
430 440
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index eaff22bda..e53c2c5f2 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -96,9 +96,11 @@ public:
96 GLenum operation = GL_COPY; 96 GLenum operation = GL_COPY;
97 } logic_op; 97 } logic_op;
98 98
99 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; 99 static constexpr std::size_t NumSamplers = 32 * 5;
100 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; 100 static constexpr std::size_t NumImages = 8 * 5;
101 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; 101 std::array<GLuint, NumSamplers> textures = {};
102 std::array<GLuint, NumSamplers> samplers = {};
103 std::array<GLuint, NumImages> images = {};
102 104
103 struct { 105 struct {
104 GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING 106 GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index c504a2c1a..9770dda1c 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -3,7 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <string> 5#include <string>
6#include <vector>
7
6#include <fmt/format.h> 8#include <fmt/format.h>
9
7#include <glad/glad.h> 10#include <glad/glad.h>
8 11
9#include "common/assert.h" 12#include "common/assert.h"
@@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t
48 51
49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 52BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
50 53
51void BindBuffersRangePushBuffer::Setup(GLuint first_) { 54void BindBuffersRangePushBuffer::Setup() {
52 first = first_; 55 entries.clear();
53 buffer_pointers.clear();
54 offsets.clear();
55 sizes.clear();
56} 56}
57 57
58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
59 buffer_pointers.push_back(buffer); 59 GLsizeiptr size) {
60 offsets.push_back(offset); 60 entries.push_back(Entry{binding, buffer, offset, size});
61 sizes.push_back(size);
62} 61}
63 62
64void BindBuffersRangePushBuffer::Bind() { 63void BindBuffersRangePushBuffer::Bind() {
65 // Ensure sizes are valid. 64 for (const Entry& entry : entries) {
66 const std::size_t count{buffer_pointers.size()}; 65 glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
68 if (count == 0) {
69 return;
70 } 66 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
78 sizes.data());
79} 67}
80 68
81void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { 69void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 6c2b45546..d56153fe7 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -43,20 +43,22 @@ public:
43 explicit BindBuffersRangePushBuffer(GLenum target); 43 explicit BindBuffersRangePushBuffer(GLenum target);
44 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
45 45
46 void Setup(GLuint first_); 46 void Setup();
47 47
48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); 48 void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
49 49
50 void Bind(); 50 void Bind();
51 51
52private: 52private:
53 GLenum target{}; 53 struct Entry {
54 GLuint first{}; 54 GLuint binding;
55 std::vector<const GLuint*> buffer_pointers; 55 const GLuint* buffer;
56 GLintptr offset;
57 GLsizeiptr size;
58 };
56 59
57 std::vector<GLuint> buffers; 60 GLenum target;
58 std::vector<GLintptr> offsets; 61 std::vector<Entry> entries;
59 std::vector<GLsizeiptr> sizes;
60}; 62};
61 63
62void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); 64void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 463ed43ae..7f0eb6b74 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -13,6 +13,8 @@
13 13
14namespace Vulkan::MaxwellToVK { 14namespace Vulkan::MaxwellToVK {
15 15
16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
17
16namespace Sampler { 18namespace Sampler {
17 19
18vk::Filter Filter(Tegra::Texture::TextureFilter filter) { 20vk::Filter Filter(Tegra::Texture::TextureFilter filter) {
@@ -196,17 +198,17 @@ std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType for
196 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; 198 return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable};
197} 199}
198 200
199vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { 201vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
200 switch (stage) { 202 switch (stage) {
201 case Maxwell::ShaderStage::Vertex: 203 case Tegra::Engines::ShaderType::Vertex:
202 return vk::ShaderStageFlagBits::eVertex; 204 return vk::ShaderStageFlagBits::eVertex;
203 case Maxwell::ShaderStage::TesselationControl: 205 case Tegra::Engines::ShaderType::TesselationControl:
204 return vk::ShaderStageFlagBits::eTessellationControl; 206 return vk::ShaderStageFlagBits::eTessellationControl;
205 case Maxwell::ShaderStage::TesselationEval: 207 case Tegra::Engines::ShaderType::TesselationEval:
206 return vk::ShaderStageFlagBits::eTessellationEvaluation; 208 return vk::ShaderStageFlagBits::eTessellationEvaluation;
207 case Maxwell::ShaderStage::Geometry: 209 case Tegra::Engines::ShaderType::Geometry:
208 return vk::ShaderStageFlagBits::eGeometry; 210 return vk::ShaderStageFlagBits::eGeometry;
209 case Maxwell::ShaderStage::Fragment: 211 case Tegra::Engines::ShaderType::Fragment:
210 return vk::ShaderStageFlagBits::eFragment; 212 return vk::ShaderStageFlagBits::eFragment;
211 } 213 }
212 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); 214 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 5b0ffd87a..904a32e01 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -32,7 +32,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
32std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, 32std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
33 PixelFormat pixel_format); 33 PixelFormat pixel_format);
34 34
35vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); 35vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
36 36
37vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); 37vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology);
38 38
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 2850d5b59..80738d3d0 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/shader_bytecode.h" 18#include "video_core/engines/shader_bytecode.h"
19#include "video_core/engines/shader_header.h" 19#include "video_core/engines/shader_header.h"
20#include "video_core/engines/shader_type.h"
20#include "video_core/renderer_vulkan/vk_device.h" 21#include "video_core/renderer_vulkan/vk_device.h"
21#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 22#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
22#include "video_core/shader/node.h" 23#include "video_core/shader/node.h"
@@ -25,13 +26,13 @@
25namespace Vulkan::VKShader { 26namespace Vulkan::VKShader {
26 27
27using Sirit::Id; 28using Sirit::Id;
29using Tegra::Engines::ShaderType;
28using Tegra::Shader::Attribute; 30using Tegra::Shader::Attribute;
29using Tegra::Shader::AttributeUse; 31using Tegra::Shader::AttributeUse;
30using Tegra::Shader::Register; 32using Tegra::Shader::Register;
31using namespace VideoCommon::Shader; 33using namespace VideoCommon::Shader;
32 34
33using Maxwell = Tegra::Engines::Maxwell3D::Regs; 35using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
35using Operation = const OperationNode&; 36using Operation = const OperationNode&;
36 37
37// TODO(Rodrigo): Use rasterizer's value 38// TODO(Rodrigo): Use rasterizer's value
@@ -93,7 +94,7 @@ class ExprDecompiler;
93 94
94class SPIRVDecompiler : public Sirit::Module { 95class SPIRVDecompiler : public Sirit::Module {
95public: 96public:
96 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) 97 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage)
97 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { 98 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
98 AddCapability(spv::Capability::Shader); 99 AddCapability(spv::Capability::Shader);
99 AddExtension("SPV_KHR_storage_buffer_storage_class"); 100 AddExtension("SPV_KHR_storage_buffer_storage_class");
@@ -256,21 +257,21 @@ private:
256 } 257 }
257 258
258 void DeclareVertex() { 259 void DeclareVertex() {
259 if (stage != ShaderStage::Vertex) 260 if (stage != ShaderType::Vertex)
260 return; 261 return;
261 262
262 DeclareVertexRedeclarations(); 263 DeclareVertexRedeclarations();
263 } 264 }
264 265
265 void DeclareGeometry() { 266 void DeclareGeometry() {
266 if (stage != ShaderStage::Geometry) 267 if (stage != ShaderType::Geometry)
267 return; 268 return;
268 269
269 UNIMPLEMENTED(); 270 UNIMPLEMENTED();
270 } 271 }
271 272
272 void DeclareFragment() { 273 void DeclareFragment() {
273 if (stage != ShaderStage::Fragment) 274 if (stage != ShaderType::Fragment)
274 return; 275 return;
275 276
276 for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { 277 for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
@@ -354,7 +355,7 @@ private:
354 continue; 355 continue;
355 } 356 }
356 357
357 UNIMPLEMENTED_IF(stage == ShaderStage::Geometry); 358 UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
358 359
359 const u32 location = GetGenericAttributeLocation(index); 360 const u32 location = GetGenericAttributeLocation(index);
360 const Id id = OpVariable(t_in_float4, spv::StorageClass::Input); 361 const Id id = OpVariable(t_in_float4, spv::StorageClass::Input);
@@ -364,7 +365,7 @@ private:
364 365
365 Decorate(id, spv::Decoration::Location, location); 366 Decorate(id, spv::Decoration::Location, location);
366 367
367 if (stage != ShaderStage::Fragment) { 368 if (stage != ShaderType::Fragment) {
368 continue; 369 continue;
369 } 370 }
370 switch (header.ps.GetAttributeUse(location)) { 371 switch (header.ps.GetAttributeUse(location)) {
@@ -548,7 +549,7 @@ private:
548 549
549 switch (attribute) { 550 switch (attribute) {
550 case Attribute::Index::Position: 551 case Attribute::Index::Position:
551 if (stage != ShaderStage::Fragment) { 552 if (stage != ShaderType::Fragment) {
552 UNIMPLEMENTED(); 553 UNIMPLEMENTED();
553 break; 554 break;
554 } else { 555 } else {
@@ -561,7 +562,7 @@ private:
561 // TODO(Subv): Find out what the values are for the first two elements when inside a 562 // TODO(Subv): Find out what the values are for the first two elements when inside a
562 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 563 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
563 // shader. 564 // shader.
564 ASSERT(stage == ShaderStage::Vertex); 565 ASSERT(stage == ShaderType::Vertex);
565 switch (element) { 566 switch (element) {
566 case 2: 567 case 2:
567 return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index))); 568 return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index)));
@@ -572,7 +573,7 @@ private:
572 return Constant(t_float, 0); 573 return Constant(t_float, 0);
573 case Attribute::Index::FrontFacing: 574 case Attribute::Index::FrontFacing:
574 // TODO(Subv): Find out what the values are for the other elements. 575 // TODO(Subv): Find out what the values are for the other elements.
575 ASSERT(stage == ShaderStage::Fragment); 576 ASSERT(stage == ShaderType::Fragment);
576 if (element == 3) { 577 if (element == 3) {
577 const Id is_front_facing = Emit(OpLoad(t_bool, front_facing)); 578 const Id is_front_facing = Emit(OpLoad(t_bool, front_facing));
578 const Id true_value = 579 const Id true_value =
@@ -1075,7 +1076,7 @@ private:
1075 1076
1076 Id PreExit() { 1077 Id PreExit() {
1077 switch (stage) { 1078 switch (stage) {
1078 case ShaderStage::Vertex: { 1079 case ShaderType::Vertex: {
1079 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't 1080 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
1080 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. 1081 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
1081 const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); 1082 const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
@@ -1085,7 +1086,7 @@ private:
1085 Emit(OpStore(z_pointer, depth)); 1086 Emit(OpStore(z_pointer, depth));
1086 break; 1087 break;
1087 } 1088 }
1088 case ShaderStage::Fragment: { 1089 case ShaderType::Fragment: {
1089 const auto SafeGetRegister = [&](u32 reg) { 1090 const auto SafeGetRegister = [&](u32 reg) {
1090 // TODO(Rodrigo): Replace with contains once C++20 releases 1091 // TODO(Rodrigo): Replace with contains once C++20 releases
1091 if (const auto it = registers.find(reg); it != registers.end()) { 1092 if (const auto it = registers.find(reg); it != registers.end()) {
@@ -1511,7 +1512,7 @@ private:
1511 1512
1512 const VKDevice& device; 1513 const VKDevice& device;
1513 const ShaderIR& ir; 1514 const ShaderIR& ir;
1514 const ShaderStage stage; 1515 const ShaderType stage;
1515 const Tegra::Shader::Header header; 1516 const Tegra::Shader::Header header;
1516 u64 conditional_nest_count{}; 1517 u64 conditional_nest_count{};
1517 u64 inside_branch{}; 1518 u64 inside_branch{};
@@ -1843,7 +1844,7 @@ void SPIRVDecompiler::DecompileAST() {
1843} 1844}
1844 1845
1845DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 1846DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
1846 Maxwell::ShaderStage stage) { 1847 ShaderType stage) {
1847 auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); 1848 auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
1848 decompiler->Decompile(); 1849 decompiler->Decompile();
1849 return {std::move(decompiler), decompiler->GetShaderEntries()}; 1850 return {std::move(decompiler), decompiler->GetShaderEntries()};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f90541cc1..203fc00d0 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -79,6 +79,6 @@ struct ShaderEntries {
79using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; 79using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
80 80
81DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 81DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
82 Maxwell::ShaderStage stage); 82 Tegra::Engines::ShaderType stage);
83 83
84} // namespace Vulkan::VKShader 84} // namespace Vulkan::VKShader
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index fe467608e..b65399f91 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -9,6 +9,7 @@
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/engines/shader_type.h"
12#include "video_core/shader/const_buffer_locker.h" 13#include "video_core/shader/const_buffer_locker.h"
13 14
14namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index 600e2f3c3..50a8ce42a 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -8,6 +8,7 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "common/hash.h" 9#include "common/hash.h"
10#include "video_core/engines/const_buffer_engine_interface.h" 10#include "video_core/engines/const_buffer_engine_interface.h"
11#include "video_core/engines/shader_type.h"
11 12
12namespace VideoCommon::Shader { 13namespace VideoCommon::Shader {
13 14
@@ -20,7 +21,7 @@ using BindlessSamplerMap =
20 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader 21 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
21 * compiler. with it, the shader can obtain required data from GPU state and store it for disk 22 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
22 * shader compilation. 23 * shader compilation.
23 **/ 24 */
24class ConstBufferLocker { 25class ConstBufferLocker {
25public: 26public:
26 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); 27 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index bb926a132..b094e5a06 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -128,8 +128,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
128 } 128 }
129 const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); 129 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
130 130
131 const auto& sampler = 131 const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
132 GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); 132 const auto& sampler = GetSampler(instr.sampler, info);
133 133
134 Node4 values; 134 Node4 values;
135 for (u32 element = 0; element < values.size(); ++element) { 135 for (u32 element = 0; element < values.size(); ++element) {
@@ -149,7 +149,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
149 // Sadly, not all texture instructions specify the type of texture their sampler 149 // Sadly, not all texture instructions specify the type of texture their sampler
150 // uses. This must be fixed at a later instance. 150 // uses. This must be fixed at a later instance.
151 const auto& sampler = 151 const auto& sampler =
152 is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); 152 is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
153 153
154 u32 indexer = 0; 154 u32 indexer = 0;
155 switch (instr.txq.query_type) { 155 switch (instr.txq.query_type) {
@@ -185,8 +185,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
185 auto texture_type = instr.tmml.texture_type.Value(); 185 auto texture_type = instr.tmml.texture_type.Value();
186 const bool is_array = instr.tmml.array != 0; 186 const bool is_array = instr.tmml.array != 0;
187 const auto& sampler = 187 const auto& sampler =
188 is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) 188 is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
189 : GetSampler(instr.sampler, {{texture_type, is_array, false}});
190 189
191 std::vector<Node> coords; 190 std::vector<Node> coords;
192 191
@@ -254,67 +253,50 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
254 return pc; 253 return pc;
255} 254}
256 255
257const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, 256ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset,
258 std::optional<SamplerInfo> sampler_info) { 257 std::optional<u32> buffer) {
259 const auto offset = static_cast<u32>(sampler.index.Value());
260
261 TextureType type;
262 bool is_array;
263 bool is_shadow;
264 if (sampler_info) { 258 if (sampler_info) {
265 type = sampler_info->type; 259 return *sampler_info;
266 is_array = sampler_info->is_array; 260 }
267 is_shadow = sampler_info->is_shadow; 261 const auto sampler =
268 } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { 262 buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset);
269 type = sampler->texture_type.Value(); 263 if (!sampler) {
270 is_array = sampler->is_array.Value() != 0;
271 is_shadow = sampler->is_shadow.Value() != 0;
272 } else {
273 LOG_WARNING(HW_GPU, "Unknown sampler info"); 264 LOG_WARNING(HW_GPU, "Unknown sampler info");
274 type = TextureType::Texture2D; 265 return SamplerInfo{TextureType::Texture2D, false, false, false};
275 is_array = false;
276 is_shadow = false;
277 } 266 }
267 return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0,
268 sampler->is_buffer != 0};
269}
270
271const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
272 std::optional<SamplerInfo> sampler_info) {
273 const auto offset = static_cast<u32>(sampler.index.Value());
274 const auto info = GetSamplerInfo(sampler_info, offset);
278 275
279 // If this sampler has already been used, return the existing mapping. 276 // If this sampler has already been used, return the existing mapping.
280 const auto it = 277 const auto it =
281 std::find_if(used_samplers.begin(), used_samplers.end(), 278 std::find_if(used_samplers.begin(), used_samplers.end(),
282 [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); 279 [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
283 if (it != used_samplers.end()) { 280 if (it != used_samplers.end()) {
284 ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && 281 ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
285 it->IsShadow() == is_shadow); 282 it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
286 return *it; 283 return *it;
287 } 284 }
288 285
289 // Otherwise create a new mapping for this sampler 286 // Otherwise create a new mapping for this sampler
290 const auto next_index = static_cast<u32>(used_samplers.size()); 287 const auto next_index = static_cast<u32>(used_samplers.size());
291 return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); 288 return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
289 info.is_buffer);
292} 290}
293 291
294const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, 292const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
295 std::optional<SamplerInfo> sampler_info) { 293 std::optional<SamplerInfo> sampler_info) {
296 const Node sampler_register = GetRegister(reg); 294 const Node sampler_register = GetRegister(reg);
297 const auto [base_sampler, buffer, offset] = 295 const auto [base_sampler, buffer, offset] =
298 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 296 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
299 ASSERT(base_sampler != nullptr); 297 ASSERT(base_sampler != nullptr);
300 298
301 TextureType type; 299 const auto info = GetSamplerInfo(sampler_info, offset, buffer);
302 bool is_array;
303 bool is_shadow;
304 if (sampler_info) {
305 type = sampler_info->type;
306 is_array = sampler_info->is_array;
307 is_shadow = sampler_info->is_shadow;
308 } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
309 type = sampler->texture_type.Value();
310 is_array = sampler->is_array.Value() != 0;
311 is_shadow = sampler->is_shadow.Value() != 0;
312 } else {
313 LOG_WARNING(HW_GPU, "Unknown sampler info");
314 type = TextureType::Texture2D;
315 is_array = false;
316 is_shadow = false;
317 }
318 300
319 // If this sampler has already been used, return the existing mapping. 301 // If this sampler has already been used, return the existing mapping.
320 const auto it = 302 const auto it =
@@ -323,15 +305,15 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
323 return entry.GetBuffer() == buffer && entry.GetOffset() == offset; 305 return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
324 }); 306 });
325 if (it != used_samplers.end()) { 307 if (it != used_samplers.end()) {
326 ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && 308 ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
327 it->IsShadow() == is_shadow); 309 it->IsShadow() == info.is_shadow);
328 return *it; 310 return *it;
329 } 311 }
330 312
331 // Otherwise create a new mapping for this sampler 313 // Otherwise create a new mapping for this sampler
332 const auto next_index = static_cast<u32>(used_samplers.size()); 314 const auto next_index = static_cast<u32>(used_samplers.size());
333 return used_samplers.emplace_back( 315 return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
334 Sampler(next_index, offset, buffer, type, is_array, is_shadow)); 316 info.is_shadow, info.is_buffer);
335} 317}
336 318
337void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { 319void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -416,17 +398,16 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
416 (texture_type == TextureType::TextureCube && is_array && is_shadow), 398 (texture_type == TextureType::TextureCube && is_array && is_shadow),
417 "This method is not supported."); 399 "This method is not supported.");
418 400
401 const SamplerInfo info{texture_type, is_array, is_shadow, false};
419 const auto& sampler = 402 const auto& sampler =
420 is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) 403 is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
421 : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
422 404
423 const bool lod_needed = process_mode == TextureProcessMode::LZ || 405 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
424 process_mode == TextureProcessMode::LL || 406 process_mode == TextureProcessMode::LL ||
425 process_mode == TextureProcessMode::LLA; 407 process_mode == TextureProcessMode::LLA;
426 408
427 // LOD selection (either via bias or explicit textureLod) not 409 // LOD selection (either via bias or explicit textureLod) not supported in GL for
428 // supported in GL for sampler2DArrayShadow and 410 // sampler2DArrayShadow and samplerCubeArrayShadow.
429 // samplerCubeArrayShadow.
430 const bool gl_lod_supported = 411 const bool gl_lod_supported =
431 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || 412 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
432 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); 413 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@@ -436,8 +417,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
436 417
437 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); 418 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
438 419
439 Node bias = {}; 420 Node bias;
440 Node lod = {}; 421 Node lod;
441 if (process_mode != TextureProcessMode::None && gl_lod_supported) { 422 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
442 switch (process_mode) { 423 switch (process_mode) {
443 case TextureProcessMode::LZ: 424 case TextureProcessMode::LZ:
@@ -573,10 +554,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
573 554
574 u64 parameter_register = instr.gpr20.Value(); 555 u64 parameter_register = instr.gpr20.Value();
575 556
576 const auto& sampler = 557 const SamplerInfo info{texture_type, is_array, depth_compare, false};
577 is_bindless 558 const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
578 ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) 559 : GetSampler(instr.sampler, info);
579 : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
580 560
581 std::vector<Node> aoffi; 561 std::vector<Node> aoffi;
582 if (is_aoffi) { 562 if (is_aoffi) {
@@ -623,7 +603,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
623 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 603 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
624 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 604 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
625 605
626 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); 606 const auto& sampler = GetSampler(instr.sampler);
627 607
628 Node4 values; 608 Node4 values;
629 for (u32 element = 0; element < values.size(); ++element) { 609 for (u32 element = 0; element < values.size(); ++element) {
@@ -636,6 +616,8 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
636} 616}
637 617
638Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { 618Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
619 const auto& sampler = GetSampler(instr.sampler);
620
639 const std::size_t type_coord_count = GetCoordCount(texture_type); 621 const std::size_t type_coord_count = GetCoordCount(texture_type);
640 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; 622 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
641 623
@@ -659,7 +641,14 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
659 // When lod is used always is in gpr20 641 // When lod is used always is in gpr20
660 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 642 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
661 643
662 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); 644 // Fill empty entries from the guest sampler.
645 const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
646 if (type_coord_count != entry_coord_count) {
647 LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
648 }
649 for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
650 coords.push_back(GetRegister(Register::ZeroIndex));
651 }
663 652
664 Node4 values; 653 Node4 values;
665 for (u32 element = 0; element < values.size(); ++element) { 654 for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 54217e6a4..44d85d434 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -225,14 +225,15 @@ class Sampler {
225public: 225public:
226 /// This constructor is for bound samplers 226 /// This constructor is for bound samplers
227 constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, 227 constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
228 bool is_array, bool is_shadow) 228 bool is_array, bool is_shadow, bool is_buffer)
229 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} 229 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
230 is_buffer{is_buffer} {}
230 231
231 /// This constructor is for bindless samplers 232 /// This constructor is for bindless samplers
232 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, 233 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
233 bool is_array, bool is_shadow) 234 bool is_array, bool is_shadow, bool is_buffer)
234 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, 235 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
235 is_shadow{is_shadow}, is_bindless{true} {} 236 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {}
236 237
237 constexpr u32 GetIndex() const { 238 constexpr u32 GetIndex() const {
238 return index; 239 return index;
@@ -258,6 +259,10 @@ public:
258 return is_shadow; 259 return is_shadow;
259 } 260 }
260 261
262 constexpr bool IsBuffer() const {
263 return is_buffer;
264 }
265
261 constexpr bool IsBindless() const { 266 constexpr bool IsBindless() const {
262 return is_bindless; 267 return is_bindless;
263 } 268 }
@@ -270,6 +275,7 @@ private:
270 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 275 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
271 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. 276 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
272 bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. 277 bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
278 bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler.
273 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. 279 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
274}; 280};
275 281
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 76a849818..2f71a50d2 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -179,6 +179,7 @@ private:
179 Tegra::Shader::TextureType type; 179 Tegra::Shader::TextureType type;
180 bool is_array; 180 bool is_array;
181 bool is_shadow; 181 bool is_shadow;
182 bool is_buffer;
182 }; 183 };
183 184
184 void Decode(); 185 void Decode();
@@ -303,13 +304,17 @@ private:
303 /// Returns a predicate combiner operation 304 /// Returns a predicate combiner operation
304 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); 305 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
305 306
307 /// Queries the missing sampler info from the execution context.
308 SamplerInfo GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset,
309 std::optional<u32> buffer = std::nullopt);
310
306 /// Accesses a texture sampler 311 /// Accesses a texture sampler
307 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, 312 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
308 std::optional<SamplerInfo> sampler_info); 313 std::optional<SamplerInfo> sampler_info = std::nullopt);
309 314
310 // Accesses a texture sampler for a bindless texture. 315 /// Accesses a texture sampler for a bindless texture.
311 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, 316 const Sampler& GetBindlessSampler(Tegra::Shader::Register reg,
312 std::optional<SamplerInfo> sampler_info); 317 std::optional<SamplerInfo> sampler_info = std::nullopt);
313 318
314 /// Accesses an image. 319 /// Accesses an image.
315 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 320 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);