summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/gpu.cpp183
-rw-r--r--src/video_core/gpu.h71
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h31
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp140
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h92
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp53
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp13
-rw-r--r--src/video_core/shader/decode/memory.cpp49
-rw-r--r--src/video_core/shader/shader_ir.h38
-rw-r--r--src/video_core/shader/track.cpp76
16 files changed, 768 insertions, 227 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 509ca117a..6113e17ff 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -87,6 +87,7 @@ add_library(video_core STATIC
87 shader/decode.cpp 87 shader/decode.cpp
88 shader/shader_ir.cpp 88 shader/shader_ir.cpp
89 shader/shader_ir.h 89 shader/shader_ir.h
90 shader/track.cpp
90 surface.cpp 91 surface.cpp
91 surface.h 92 surface.h
92 textures/astc.cpp 93 textures/astc.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index cdef97bc6..9989825f8 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -208,6 +208,8 @@ enum class UniformType : u64 {
208 SignedShort = 3, 208 SignedShort = 3,
209 Single = 4, 209 Single = 4,
210 Double = 5, 210 Double = 5,
211 Quad = 6,
212 UnsignedQuad = 7,
211}; 213};
212 214
213enum class StoreType : u64 { 215enum class StoreType : u64 {
@@ -785,6 +787,12 @@ union Instruction {
785 } st_l; 787 } st_l;
786 788
787 union { 789 union {
790 BitField<48, 3, UniformType> type;
791 BitField<46, 2, u64> cache_mode;
792 BitField<20, 24, s64> immediate_offset;
793 } ldg;
794
795 union {
788 BitField<0, 3, u64> pred0; 796 BitField<0, 3, u64> pred0;
789 BitField<3, 3, u64> pred3; 797 BitField<3, 3, u64> pred3;
790 BitField<7, 1, u64> abs_a; 798 BitField<7, 1, u64> abs_a;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08cf6268f..d3d32a359 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,8 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "core/core_timing.h"
7#include "core/memory.h"
6#include "video_core/engines/fermi_2d.h" 8#include "video_core/engines/fermi_2d.h"
7#include "video_core/engines/kepler_memory.h" 9#include "video_core/engines/kepler_memory.h"
8#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
@@ -124,9 +126,36 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
124 } 126 }
125} 127}
126 128
129// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
130// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
131// So the values you see in docs might be multiplied by 4.
127enum class BufferMethods { 132enum class BufferMethods {
128 BindObject = 0, 133 BindObject = 0x0,
129 CountBufferMethods = 0x40, 134 Nop = 0x2,
135 SemaphoreAddressHigh = 0x4,
136 SemaphoreAddressLow = 0x5,
137 SemaphoreSequence = 0x6,
138 SemaphoreTrigger = 0x7,
139 NotifyIntr = 0x8,
140 WrcacheFlush = 0x9,
141 Unk28 = 0xA,
142 Unk2c = 0xB,
143 RefCnt = 0x14,
144 SemaphoreAcquire = 0x1A,
145 SemaphoreRelease = 0x1B,
146 Unk70 = 0x1C,
147 Unk74 = 0x1D,
148 Unk78 = 0x1E,
149 Unk7c = 0x1F,
150 Yield = 0x20,
151 NonPullerMethods = 0x40,
152};
153
154enum class GpuSemaphoreOperation {
155 AcquireEqual = 0x1,
156 WriteLong = 0x2,
157 AcquireGequal = 0x4,
158 AcquireMask = 0x8,
130}; 159};
131 160
132void GPU::CallMethod(const MethodCall& method_call) { 161void GPU::CallMethod(const MethodCall& method_call) {
@@ -135,20 +164,78 @@ void GPU::CallMethod(const MethodCall& method_call) {
135 164
136 ASSERT(method_call.subchannel < bound_engines.size()); 165 ASSERT(method_call.subchannel < bound_engines.size());
137 166
138 if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) { 167 if (ExecuteMethodOnEngine(method_call)) {
139 // Bind the current subchannel to the desired engine id. 168 CallEngineMethod(method_call);
140 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, 169 } else {
141 method_call.argument); 170 CallPullerMethod(method_call);
142 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
143 return;
144 } 171 }
172}
173
174bool GPU::ExecuteMethodOnEngine(const MethodCall& method_call) {
175 const auto method = static_cast<BufferMethods>(method_call.method);
176 return method >= BufferMethods::NonPullerMethods;
177}
145 178
146 if (method_call.method < static_cast<u32>(BufferMethods::CountBufferMethods)) { 179void GPU::CallPullerMethod(const MethodCall& method_call) {
147 // TODO(Subv): Research and implement these methods. 180 regs.reg_array[method_call.method] = method_call.argument;
148 LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented"); 181 const auto method = static_cast<BufferMethods>(method_call.method);
149 return; 182
183 switch (method) {
184 case BufferMethods::BindObject: {
185 ProcessBindMethod(method_call);
186 break;
187 }
188 case BufferMethods::Nop:
189 case BufferMethods::SemaphoreAddressHigh:
190 case BufferMethods::SemaphoreAddressLow:
191 case BufferMethods::SemaphoreSequence:
192 case BufferMethods::RefCnt:
193 break;
194 case BufferMethods::SemaphoreTrigger: {
195 ProcessSemaphoreTriggerMethod();
196 break;
197 }
198 case BufferMethods::NotifyIntr: {
199 // TODO(Kmather73): Research and implement this method.
200 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
201 break;
202 }
203 case BufferMethods::WrcacheFlush: {
204 // TODO(Kmather73): Research and implement this method.
205 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
206 break;
207 }
208 case BufferMethods::Unk28: {
209 // TODO(Kmather73): Research and implement this method.
210 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
211 break;
212 }
213 case BufferMethods::Unk2c: {
214 // TODO(Kmather73): Research and implement this method.
215 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
216 break;
217 }
218 case BufferMethods::SemaphoreAcquire: {
219 ProcessSemaphoreAcquire();
220 break;
150 } 221 }
222 case BufferMethods::SemaphoreRelease: {
223 ProcessSemaphoreRelease();
224 break;
225 }
226 case BufferMethods::Yield: {
227 // TODO(Kmather73): Research and implement this method.
228 LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
229 break;
230 }
231 default:
232 LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented",
233 static_cast<u32>(method));
234 break;
235 }
236}
151 237
238void GPU::CallEngineMethod(const MethodCall& method_call) {
152 const EngineID engine = bound_engines[method_call.subchannel]; 239 const EngineID engine = bound_engines[method_call.subchannel];
153 240
154 switch (engine) { 241 switch (engine) {
@@ -172,4 +259,76 @@ void GPU::CallMethod(const MethodCall& method_call) {
172 } 259 }
173} 260}
174 261
262void GPU::ProcessBindMethod(const MethodCall& method_call) {
263 // Bind the current subchannel to the desired engine id.
264 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
265 method_call.argument);
266 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
267}
268
269void GPU::ProcessSemaphoreTriggerMethod() {
270 const auto semaphoreOperationMask = 0xF;
271 const auto op =
272 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
273 if (op == GpuSemaphoreOperation::WriteLong) {
274 auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
275 struct Block {
276 u32 sequence;
277 u32 zeros = 0;
278 u64 timestamp;
279 };
280
281 Block block{};
282 block.sequence = regs.semaphore_sequence;
283 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
284 // CoreTiming
285 block.timestamp = CoreTiming::GetTicks();
286 Memory::WriteBlock(*address, &block, sizeof(block));
287 } else {
288 const auto address =
289 memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
290 const u32 word = Memory::Read32(*address);
291 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
292 (op == GpuSemaphoreOperation::AcquireGequal &&
293 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
294 (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
295 // Nothing to do in this case
296 } else {
297 regs.acquire_source = true;
298 regs.acquire_value = regs.semaphore_sequence;
299 if (op == GpuSemaphoreOperation::AcquireEqual) {
300 regs.acquire_active = true;
301 regs.acquire_mode = false;
302 } else if (op == GpuSemaphoreOperation::AcquireGequal) {
303 regs.acquire_active = true;
304 regs.acquire_mode = true;
305 } else if (op == GpuSemaphoreOperation::AcquireMask) {
306 // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
307 // semaphore_sequence, gives a non-0 result
308 LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
309 } else {
310 LOG_ERROR(HW_GPU, "Invalid semaphore operation");
311 }
312 }
313 }
314}
315
316void GPU::ProcessSemaphoreRelease() {
317 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
318 Memory::Write32(*address, regs.semaphore_release);
319}
320
321void GPU::ProcessSemaphoreAcquire() {
322 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
323 const u32 word = Memory::Read32(*address);
324 const auto value = regs.semaphore_acquire;
325 if (word != value) {
326 regs.acquire_active = true;
327 regs.acquire_value = value;
328 // TODO(kemathe73) figure out how to do the acquire_timeout
329 regs.acquire_mode = false;
330 regs.acquire_source = false;
331 }
332}
333
175} // namespace Tegra 334} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index af5ccd1e9..fb8975811 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -156,6 +156,46 @@ public:
156 /// Returns a const reference to the GPU DMA pusher. 156 /// Returns a const reference to the GPU DMA pusher.
157 const Tegra::DmaPusher& DmaPusher() const; 157 const Tegra::DmaPusher& DmaPusher() const;
158 158
159 struct Regs {
160 static constexpr size_t NUM_REGS = 0x100;
161
162 union {
163 struct {
164 INSERT_PADDING_WORDS(0x4);
165 struct {
166 u32 address_high;
167 u32 address_low;
168
169 GPUVAddr SmaphoreAddress() const {
170 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
171 address_low);
172 }
173 } smaphore_address;
174
175 u32 semaphore_sequence;
176 u32 semaphore_trigger;
177 INSERT_PADDING_WORDS(0xC);
178
179 // The puser and the puller share the reference counter, the pusher only has read
180 // access
181 u32 reference_count;
182 INSERT_PADDING_WORDS(0x5);
183
184 u32 semaphore_acquire;
185 u32 semaphore_release;
186 INSERT_PADDING_WORDS(0xE4);
187
188 // Puller state
189 u32 acquire_mode;
190 u32 acquire_source;
191 u32 acquire_active;
192 u32 acquire_timeout;
193 u32 acquire_value;
194 };
195 std::array<u32, NUM_REGS> reg_array;
196 };
197 } regs{};
198
159private: 199private:
160 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 200 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
161 std::unique_ptr<Tegra::MemoryManager> memory_manager; 201 std::unique_ptr<Tegra::MemoryManager> memory_manager;
@@ -173,6 +213,37 @@ private:
173 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 213 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
174 /// Inline memory engine 214 /// Inline memory engine
175 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 215 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
216
217 void ProcessBindMethod(const MethodCall& method_call);
218 void ProcessSemaphoreTriggerMethod();
219 void ProcessSemaphoreRelease();
220 void ProcessSemaphoreAcquire();
221
222 // Calls a GPU puller method.
223 void CallPullerMethod(const MethodCall& method_call);
224 // Calls a GPU engine method.
225 void CallEngineMethod(const MethodCall& method_call);
226 // Determines where the method should be executed.
227 bool ExecuteMethodOnEngine(const MethodCall& method_call);
176}; 228};
177 229
230#define ASSERT_REG_POSITION(field_name, position) \
231 static_assert(offsetof(GPU::Regs, field_name) == position * 4, \
232 "Field " #field_name " has invalid position")
233
234ASSERT_REG_POSITION(smaphore_address, 0x4);
235ASSERT_REG_POSITION(semaphore_sequence, 0x6);
236ASSERT_REG_POSITION(semaphore_trigger, 0x7);
237ASSERT_REG_POSITION(reference_count, 0x14);
238ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
239ASSERT_REG_POSITION(semaphore_release, 0x1B);
240
241ASSERT_REG_POSITION(acquire_mode, 0x100);
242ASSERT_REG_POSITION(acquire_source, 0x101);
243ASSERT_REG_POSITION(acquire_active, 0x102);
244ASSERT_REG_POSITION(acquire_timeout, 0x103);
245ASSERT_REG_POSITION(acquire_value, 0x104);
246
247#undef ASSERT_REG_POSITION
248
178} // namespace Tegra 249} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..c7f32feaa 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,8 +4,13 @@
4 4
5#include <glad/glad.h> 5#include <glad/glad.h>
6 6
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/memory.h"
7#include "video_core/renderer_opengl/gl_global_cache.h" 11#include "video_core/renderer_opengl/gl_global_cache.h"
8#include "video_core/renderer_opengl/gl_rasterizer.h" 12#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h"
9#include "video_core/renderer_opengl/utils.h" 14#include "video_core/renderer_opengl/utils.h"
10 15
11namespace OpenGL { 16namespace OpenGL {
@@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
18 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
19} 24}
20 25
26void CachedGlobalRegion::Reload(u32 size_) {
27 constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
28
29 size = size_;
30 if (size > max_size) {
31 size = max_size;
32 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
33 max_size);
34 }
35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
51 if (!region) {
52 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size);
54 ReserveGlobalRegion(region);
55 }
56 region->Reload(size);
57 return region;
58}
59
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
61 reserve[region->GetAddr()] = region;
62}
63
21GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
22 : RasterizerCache{rasterizer} {} 65 : RasterizerCache{rasterizer} {}
23 66
67GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
68 const GLShader::GlobalMemoryEntry& global_region,
69 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
70
71 auto& gpu{Core::System::GetInstance().GPU()};
72 const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
73 const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
74 cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
75 ASSERT(cbuf_addr);
76
77 const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
78 const auto size = Memory::Read32(*cbuf_addr + 8);
79 const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
80 ASSERT(actual_addr);
81
82 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr);
84
85 if (!region) {
86 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size);
88 Register(region);
89 }
90
91 return region;
92}
93
24} // namespace OpenGL 94} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..37830bb7c 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_cache.h" 15#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
13 17
@@ -40,6 +44,9 @@ public:
40 return buffer.handle; 44 return buffer.handle;
41 } 45 }
42 46
47 /// Reloads the global region from guest memory
48 void Reload(u32 size_);
49
43 // TODO(Rodrigo): When global memory is written (STG), implement flushing 50 // TODO(Rodrigo): When global memory is written (STG), implement flushing
44 void Flush() override { 51 void Flush() override {
45 UNIMPLEMENTED(); 52 UNIMPLEMENTED();
@@ -55,6 +62,17 @@ private:
55class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { 62class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
56public: 63public:
57 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); 64 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
65
66 /// Gets the current specified shader stage program
67 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
68 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
69
70private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
73 void ReserveGlobalRegion(const GlobalRegion& region);
74
75 std::unordered_map<VAddr, GlobalRegion> reserve;
58}; 76};
59 77
60} // namespace OpenGL 78} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 71829fee0..ee313cb2f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -297,10 +297,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
297 MICROPROFILE_SCOPE(OpenGL_Shader); 297 MICROPROFILE_SCOPE(OpenGL_Shader);
298 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 298 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
299 299
300 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL 300 BaseBindings base_bindings;
301 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
302 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
303 u32 current_texture_bindpoint = 0;
304 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 301 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
305 302
306 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 303 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -324,43 +321,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
324 const GLintptr offset = buffer_cache.UploadHostMemory( 321 const GLintptr offset = buffer_cache.UploadHostMemory(
325 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); 322 &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment));
326 323
327 // Bind the buffer 324 // Bind the emulation info buffer
328 glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(), 325 glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset,
329 offset, static_cast<GLsizeiptr>(sizeof(ubo))); 326 static_cast<GLsizeiptr>(sizeof(ubo)));
330 327
331 Shader shader{shader_cache.GetStageProgram(program)}; 328 Shader shader{shader_cache.GetStageProgram(program)};
329 const auto [program_handle, next_bindings] =
330 shader->GetProgramHandle(primitive_mode, base_bindings);
332 331
333 switch (program) { 332 switch (program) {
334 case Maxwell::ShaderProgram::VertexA: 333 case Maxwell::ShaderProgram::VertexA:
335 case Maxwell::ShaderProgram::VertexB: { 334 case Maxwell::ShaderProgram::VertexB:
336 shader_program_manager->UseProgrammableVertexShader( 335 shader_program_manager->UseProgrammableVertexShader(program_handle);
337 shader->GetProgramHandle(primitive_mode));
338 break; 336 break;
339 } 337 case Maxwell::ShaderProgram::Geometry:
340 case Maxwell::ShaderProgram::Geometry: { 338 shader_program_manager->UseProgrammableGeometryShader(program_handle);
341 shader_program_manager->UseProgrammableGeometryShader(
342 shader->GetProgramHandle(primitive_mode));
343 break; 339 break;
344 } 340 case Maxwell::ShaderProgram::Fragment:
345 case Maxwell::ShaderProgram::Fragment: { 341 shader_program_manager->UseProgrammableFragmentShader(program_handle);
346 shader_program_manager->UseProgrammableFragmentShader(
347 shader->GetProgramHandle(primitive_mode));
348 break; 342 break;
349 }
350 default: 343 default:
351 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 344 LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
352 shader_config.enable.Value(), shader_config.offset); 345 shader_config.enable.Value(), shader_config.offset);
353 UNREACHABLE(); 346 UNREACHABLE();
354 } 347 }
355 348
356 // Configure the const buffers for this shader stage. 349 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
357 current_constbuffer_bindpoint = 350 SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
358 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, 351 SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
359 current_constbuffer_bindpoint); 352 SetupTextures(stage_enum, shader, program_handle, base_bindings);
360
361 // Configure the textures for this shader stage.
362 current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
363 primitive_mode, current_texture_bindpoint);
364 353
365 // Workaround for Intel drivers. 354 // Workaround for Intel drivers.
366 // When a clip distance is enabled but not set in the shader it crops parts of the screen 355 // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -375,6 +364,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
375 // VertexB was combined with VertexA, so we skip the VertexB iteration 364 // VertexB was combined with VertexA, so we skip the VertexB iteration
376 index++; 365 index++;
377 } 366 }
367
368 base_bindings = next_bindings;
378 } 369 }
379 370
380 SyncClipEnabled(clip_distances); 371 SyncClipEnabled(clip_distances);
@@ -924,8 +915,9 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
924 } 915 }
925} 916}
926 917
927u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, 918void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
928 GLenum primitive_mode, u32 current_bindpoint) { 919 const Shader& shader, GLuint program_handle,
920 BaseBindings base_bindings) {
929 MICROPROFILE_SCOPE(OpenGL_UBO); 921 MICROPROFILE_SCOPE(OpenGL_UBO);
930 const auto& gpu = Core::System::GetInstance().GPU(); 922 const auto& gpu = Core::System::GetInstance().GPU();
931 const auto& maxwell3d = gpu.Maxwell3D(); 923 const auto& maxwell3d = gpu.Maxwell3D();
@@ -973,75 +965,73 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
973 size = Common::AlignUp(size, sizeof(GLvec4)); 965 size = Common::AlignUp(size, sizeof(GLvec4));
974 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); 966 ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
975 967
976 GLintptr const_buffer_offset = buffer_cache.UploadMemory( 968 const GLintptr const_buffer_offset = buffer_cache.UploadMemory(
977 buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); 969 buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment));
978 970
979 // Now configure the bindpoint of the buffer inside the shader
980 glUniformBlockBinding(shader->GetProgramHandle(primitive_mode),
981 shader->GetProgramResourceIndex(used_buffer),
982 current_bindpoint + bindpoint);
983
984 // Prepare values for multibind 971 // Prepare values for multibind
985 bind_buffers[bindpoint] = buffer_cache.GetHandle(); 972 bind_buffers[bindpoint] = buffer_cache.GetHandle();
986 bind_offsets[bindpoint] = const_buffer_offset; 973 bind_offsets[bindpoint] = const_buffer_offset;
987 bind_sizes[bindpoint] = size; 974 bind_sizes[bindpoint] = size;
988 } 975 }
989 976
990 glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()), 977 // The first binding is reserved for emulation values
978 const GLuint ubo_base_binding = base_bindings.cbuf + 1;
979 glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()),
991 bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); 980 bind_buffers.data(), bind_offsets.data(), bind_sizes.data());
981}
982
983void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
984 const Shader& shader, GLenum primitive_mode,
985 BaseBindings base_bindings) {
986 // TODO(Rodrigo): Use ARB_multi_bind here
987 const auto& entries = shader->GetShaderEntries().global_memory_entries;
992 988
993 return current_bindpoint + static_cast<u32>(entries.size()); 989 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) {
990 const auto& entry = entries[bindpoint];
991 const u32 current_bindpoint = base_bindings.gmem + bindpoint;
992 const auto& region = global_cache.GetGlobalRegion(entry, stage);
993
994 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
995 }
994} 996}
995 997
996u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, 998void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
997 GLenum primitive_mode, u32 current_unit) { 999 GLuint program_handle, BaseBindings base_bindings) {
998 MICROPROFILE_SCOPE(OpenGL_Texture); 1000 MICROPROFILE_SCOPE(OpenGL_Texture);
999 const auto& gpu = Core::System::GetInstance().GPU(); 1001 const auto& gpu = Core::System::GetInstance().GPU();
1000 const auto& maxwell3d = gpu.Maxwell3D(); 1002 const auto& maxwell3d = gpu.Maxwell3D();
1001 const auto& entries = shader->GetShaderEntries().samplers; 1003 const auto& entries = shader->GetShaderEntries().samplers;
1002 1004
1003 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), 1005 ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
1004 "Exceeded the number of active textures."); 1006 "Exceeded the number of active textures.");
1005 1007
1006 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 1008 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1007 const auto& entry = entries[bindpoint]; 1009 const auto& entry = entries[bindpoint];
1008 const u32 current_bindpoint = current_unit + bindpoint; 1010 const u32 current_bindpoint = base_bindings.sampler + bindpoint;
1009 1011 auto& unit = state.texture_units[current_bindpoint];
1010 // Bind the uniform to the sampler.
1011
1012 glProgramUniform1i(shader->GetProgramHandle(primitive_mode),
1013 shader->GetUniformLocation(entry), current_bindpoint);
1014 1012
1015 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); 1013 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
1016
1017 if (!texture.enabled) { 1014 if (!texture.enabled) {
1018 state.texture_units[current_bindpoint].texture = 0; 1015 unit.texture = 0;
1019 continue; 1016 continue;
1020 } 1017 }
1021 1018
1022 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 1019 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1020
1023 Surface surface = res_cache.GetTextureSurface(texture, entry); 1021 Surface surface = res_cache.GetTextureSurface(texture, entry);
1024 if (surface != nullptr) { 1022 if (surface != nullptr) {
1025 const GLuint handle = 1023 unit.texture =
1026 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 1024 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
1027 const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); 1025 unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
1028 state.texture_units[current_bindpoint].texture = handle; 1026 unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source);
1029 state.texture_units[current_bindpoint].target = target; 1027 unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
1030 state.texture_units[current_bindpoint].swizzle.r = 1028 unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
1031 MaxwellToGL::SwizzleSource(texture.tic.x_source); 1029 unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
1032 state.texture_units[current_bindpoint].swizzle.g =
1033 MaxwellToGL::SwizzleSource(texture.tic.y_source);
1034 state.texture_units[current_bindpoint].swizzle.b =
1035 MaxwellToGL::SwizzleSource(texture.tic.z_source);
1036 state.texture_units[current_bindpoint].swizzle.a =
1037 MaxwellToGL::SwizzleSource(texture.tic.w_source);
1038 } else { 1030 } else {
1039 // Can occur when texture addr is null or its memory is unmapped/invalid 1031 // Can occur when texture addr is null or its memory is unmapped/invalid
1040 state.texture_units[current_bindpoint].texture = 0; 1032 unit.texture = 0;
1041 } 1033 }
1042 } 1034 }
1043
1044 return current_unit + static_cast<u32>(entries.size());
1045} 1035}
1046 1036
1047void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { 1037void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..a103692f9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -127,25 +127,18 @@ private:
127 bool using_depth_fb = true, bool preserve_contents = true, 127 bool using_depth_fb = true, bool preserve_contents = true,
128 std::optional<std::size_t> single_color_target = {}); 128 std::optional<std::size_t> single_color_target = {});
129 129
130 /** 130 /// Configures the current constbuffers to use for the draw command.
131 * Configures the current constbuffers to use for the draw command. 131 void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
132 * @param stage The shader stage to configure buffers for. 132 GLuint program_handle, BaseBindings base_bindings);
133 * @param shader The shader object that contains the specified stage. 133
134 * @param current_bindpoint The offset at which to start counting new buffer bindpoints. 134 /// Configures the current global memory entries to use for the draw command.
135 * @returns The next available bindpoint for use in the next shader stage. 135 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
136 */ 136 const Shader& shader, GLenum primitive_mode,
137 u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, 137 BaseBindings base_bindings);
138 GLenum primitive_mode, u32 current_bindpoint); 138
139 139 /// Configures the current textures to use for the draw command.
140 /** 140 void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
141 * Configures the current textures to use for the draw command. 141 GLuint program_handle, BaseBindings base_bindings);
142 * @param stage The shader stage to configure textures for.
143 * @param shader The shader object that contains the specified stage.
144 * @param current_unit The offset at which to start counting unused texture units.
145 * @returns The next available bindpoint for use in the next shader stage.
146 */
147 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
148 GLenum primitive_mode, u32 current_unit);
149 142
150 /// Syncs the viewport and depth range to match the guest state 143 /// Syncs the viewport and depth range to match the guest state
151 void SyncViewport(OpenGLState& current_state); 144 void SyncViewport(OpenGLState& current_state);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b3aca39af..90eda7814 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,36 +34,25 @@ static ProgramCode GetShaderCode(VAddr addr) {
34 return program_code; 34 return program_code;
35} 35}
36 36
37/// Helper function to set shader uniform block bindings for a single shader stage 37/// Gets the shader type from a Maxwell program type
38static void SetShaderUniformBlockBinding(GLuint shader, const char* name, 38constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
39 Maxwell::ShaderStage binding, std::size_t expected_size) { 39 switch (program_type) {
40 const GLuint ub_index = glGetUniformBlockIndex(shader, name); 40 case Maxwell::ShaderProgram::VertexA:
41 if (ub_index == GL_INVALID_INDEX) { 41 case Maxwell::ShaderProgram::VertexB:
42 return; 42 return GL_VERTEX_SHADER;
43 case Maxwell::ShaderProgram::Geometry:
44 return GL_GEOMETRY_SHADER;
45 case Maxwell::ShaderProgram::Fragment:
46 return GL_FRAGMENT_SHADER;
47 default:
48 return GL_NONE;
43 } 49 }
44
45 GLint ub_size = 0;
46 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
47 ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size,
48 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
49 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
50}
51
52/// Sets shader uniform block bindings for an entire shader program
53static void SetShaderUniformBlockBindings(GLuint shader) {
54 SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
55 sizeof(GLShader::MaxwellUniformData));
56 SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
57 sizeof(GLShader::MaxwellUniformData));
58 SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
59 sizeof(GLShader::MaxwellUniformData));
60} 50}
61 51
62CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) 52CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
63 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { 53 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
64 54
65 GLShader::ProgramResult program_result; 55 GLShader::ProgramResult program_result;
66 GLenum gl_type{};
67 56
68 switch (program_type) { 57 switch (program_type) {
69 case Maxwell::ShaderProgram::VertexA: 58 case Maxwell::ShaderProgram::VertexA:
@@ -74,17 +63,14 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
74 case Maxwell::ShaderProgram::VertexB: 63 case Maxwell::ShaderProgram::VertexB:
75 CalculateProperties(); 64 CalculateProperties();
76 program_result = GLShader::GenerateVertexShader(setup); 65 program_result = GLShader::GenerateVertexShader(setup);
77 gl_type = GL_VERTEX_SHADER;
78 break; 66 break;
79 case Maxwell::ShaderProgram::Geometry: 67 case Maxwell::ShaderProgram::Geometry:
80 CalculateProperties(); 68 CalculateProperties();
81 program_result = GLShader::GenerateGeometryShader(setup); 69 program_result = GLShader::GenerateGeometryShader(setup);
82 gl_type = GL_GEOMETRY_SHADER;
83 break; 70 break;
84 case Maxwell::ShaderProgram::Fragment: 71 case Maxwell::ShaderProgram::Fragment:
85 CalculateProperties(); 72 CalculateProperties();
86 program_result = GLShader::GenerateFragmentShader(setup); 73 program_result = GLShader::GenerateFragmentShader(setup);
87 gl_type = GL_FRAGMENT_SHADER;
88 break; 74 break;
89 default: 75 default:
90 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 76 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
@@ -92,59 +78,105 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
92 return; 78 return;
93 } 79 }
94 80
81 code = program_result.first;
95 entries = program_result.second; 82 entries = program_result.second;
96 shader_length = entries.shader_length; 83 shader_length = entries.shader_length;
84}
97 85
98 if (program_type != Maxwell::ShaderProgram::Geometry) { 86std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
99 OGLShader shader; 87 BaseBindings base_bindings) {
100 shader.Create(program_result.first.c_str(), gl_type); 88 GLuint handle{};
101 program.Create(true, shader.handle); 89 if (program_type == Maxwell::ShaderProgram::Geometry) {
102 SetShaderUniformBlockBindings(program.handle); 90 handle = GetGeometryShader(primitive_mode, base_bindings);
103 LabelGLObject(GL_PROGRAM, program.handle, addr);
104 } else { 91 } else {
105 // Store shader's code to lazily build it on draw 92 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
106 geometry_programs.code = program_result.first; 93 auto& program = entry->second;
94 if (is_cache_miss) {
95 std::string source = AllocateBindings(base_bindings);
96 source += code;
97
98 OGLShader shader;
99 shader.Create(source.c_str(), GetShaderType(program_type));
100 program.Create(true, shader.handle);
101 LabelGLObject(GL_PROGRAM, program.handle, addr);
102 }
103
104 handle = program.handle;
107 } 105 }
106
107 // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for
108 // emulation values
109 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
110 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
111 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
112
113 return {handle, base_bindings};
108} 114}
109 115
110GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { 116std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
111 const auto search{resource_cache.find(buffer.GetHash())}; 117 std::string code = "#version 430 core\n";
112 if (search == resource_cache.end()) { 118 code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
113 const GLuint index{ 119
114 glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; 120 for (const auto& cbuf : entries.const_buffers) {
115 resource_cache[buffer.GetHash()] = index; 121 code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
116 return index;
117 } 122 }
118 123
119 return search->second; 124 for (const auto& gmem : entries.global_memory_entries) {
120} 125 code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
126 gmem.GetCbufOffset(), base_bindings.gmem++);
127 }
121 128
122GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { 129 for (const auto& sampler : entries.samplers) {
123 const auto search{uniform_cache.find(sampler.GetHash())}; 130 code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
124 if (search == uniform_cache.end()) { 131 base_bindings.sampler++);
125 const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
126 uniform_cache[sampler.GetHash()] = index;
127 return index;
128 } 132 }
129 133
130 return search->second; 134 return code;
135}
136
137GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
138 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
139 auto& programs = entry->second;
140
141 switch (primitive_mode) {
142 case GL_POINTS:
143 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
144 case GL_LINES:
145 case GL_LINE_STRIP:
146 return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines");
147 case GL_LINES_ADJACENCY:
148 case GL_LINE_STRIP_ADJACENCY:
149 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4,
150 "ShaderLinesAdjacency");
151 case GL_TRIANGLES:
152 case GL_TRIANGLE_STRIP:
153 case GL_TRIANGLE_FAN:
154 return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3,
155 "ShaderTriangles");
156 case GL_TRIANGLES_ADJACENCY:
157 case GL_TRIANGLE_STRIP_ADJACENCY:
158 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings,
159 "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
160 default:
161 UNREACHABLE_MSG("Unknown primitive mode.");
162 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints");
163 }
131} 164}
132 165
133GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, 166GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
134 const std::string& glsl_topology, u32 max_vertices, 167 const std::string& glsl_topology, u32 max_vertices,
135 const std::string& debug_name) { 168 const std::string& debug_name) {
136 if (target_program.handle != 0) { 169 if (target_program.handle != 0) {
137 return target_program.handle; 170 return target_program.handle;
138 } 171 }
139 std::string source = "#version 430 core\n"; 172 std::string source = AllocateBindings(base_bindings);
140 source += "layout (" + glsl_topology + ") in;\n"; 173 source += "layout (" + glsl_topology + ") in;\n";
141 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 174 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
142 source += geometry_programs.code; 175 source += code;
143 176
144 OGLShader shader; 177 OGLShader shader;
145 shader.Create(source.c_str(), GL_GEOMETRY_SHADER); 178 shader.Create(source.c_str(), GL_GEOMETRY_SHADER);
146 target_program.Create(true, shader.handle); 179 target_program.Create(true, shader.handle);
147 SetShaderUniformBlockBindings(target_program.handle);
148 LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); 180 LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
149 return target_program.handle; 181 return target_program.handle;
150}; 182};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index e0887dd7b..904d15dd0 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -7,6 +7,9 @@
7#include <array> 7#include <array>
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <tuple>
11
12#include <glad/glad.h>
10 13
11#include "common/assert.h" 14#include "common/assert.h"
12#include "common/common_types.h" 15#include "common/common_types.h"
@@ -23,6 +26,16 @@ class RasterizerOpenGL;
23using Shader = std::shared_ptr<CachedShader>; 26using Shader = std::shared_ptr<CachedShader>;
24using Maxwell = Tegra::Engines::Maxwell3D::Regs; 27using Maxwell = Tegra::Engines::Maxwell3D::Regs;
25 28
29struct BaseBindings {
30 u32 cbuf{};
31 u32 gmem{};
32 u32 sampler{};
33
34 bool operator<(const BaseBindings& rhs) const {
35 return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
36 }
37};
38
26class CachedShader final : public RasterizerCacheObject { 39class CachedShader final : public RasterizerCacheObject {
27public: 40public:
28 CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); 41 CachedShader(VAddr addr, Maxwell::ShaderProgram program_type);
@@ -44,70 +57,45 @@ public:
44 } 57 }
45 58
46 /// Gets the GL program handle for the shader 59 /// Gets the GL program handle for the shader
47 GLuint GetProgramHandle(GLenum primitive_mode) { 60 std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode,
48 if (program_type != Maxwell::ShaderProgram::Geometry) { 61 BaseBindings base_bindings);
49 return program.handle;
50 }
51 switch (primitive_mode) {
52 case GL_POINTS:
53 return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
54 case GL_LINES:
55 case GL_LINE_STRIP:
56 return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines");
57 case GL_LINES_ADJACENCY:
58 case GL_LINE_STRIP_ADJACENCY:
59 return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4,
60 "ShaderLinesAdjacency");
61 case GL_TRIANGLES:
62 case GL_TRIANGLE_STRIP:
63 case GL_TRIANGLE_FAN:
64 return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3,
65 "ShaderTriangles");
66 case GL_TRIANGLES_ADJACENCY:
67 case GL_TRIANGLE_STRIP_ADJACENCY:
68 return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency",
69 6, "ShaderTrianglesAdjacency");
70 default:
71 UNREACHABLE_MSG("Unknown primitive mode.");
72 return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
73 }
74 }
75 62
76 /// Gets the GL program resource location for the specified resource, caching as needed 63private:
77 GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); 64 // Geometry programs. These are needed because GLSL needs an input topology but it's not
65 // declared by the hardware. Workaround this issue by generating a different shader per input
66 // topology class.
67 struct GeometryPrograms {
68 OGLProgram points;
69 OGLProgram lines;
70 OGLProgram lines_adjacency;
71 OGLProgram triangles;
72 OGLProgram triangles_adjacency;
73 };
78 74
79 /// Gets the GL uniform location for the specified resource, caching as needed 75 std::string AllocateBindings(BaseBindings base_bindings);
80 GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); 76
77 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
81 78
82private:
83 /// Generates a geometry shader or returns one that already exists. 79 /// Generates a geometry shader or returns one that already exists.
84 GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, 80 GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings,
85 u32 max_vertices, const std::string& debug_name); 81 const std::string& glsl_topology, u32 max_vertices,
82 const std::string& debug_name);
86 83
87 void CalculateProperties(); 84 void CalculateProperties();
88 85
89 VAddr addr; 86 VAddr addr{};
90 std::size_t shader_length; 87 std::size_t shader_length{};
91 Maxwell::ShaderProgram program_type; 88 Maxwell::ShaderProgram program_type{};
92 GLShader::ShaderSetup setup; 89 GLShader::ShaderSetup setup;
93 GLShader::ShaderEntries entries; 90 GLShader::ShaderEntries entries;
94 91
95 // Non-geometry program. 92 std::string code;
96 OGLProgram program;
97 93
98 // Geometry programs. These are needed because GLSL needs an input topology but it's not 94 std::map<BaseBindings, OGLProgram> programs;
99 // declared by the hardware. Workaround this issue by generating a different shader per input 95 std::map<BaseBindings, GeometryPrograms> geometry_programs;
100 // topology class.
101 struct {
102 std::string code;
103 OGLProgram points;
104 OGLProgram lines;
105 OGLProgram lines_adjacency;
106 OGLProgram triangles;
107 OGLProgram triangles_adjacency;
108 } geometry_programs;
109 96
110 std::map<u32, GLuint> resource_cache; 97 std::map<u32, GLuint> cbuf_resource_cache;
98 std::map<u32, GLuint> gmem_resource_cache;
111 std::map<u32, GLint> uniform_cache; 99 std::map<u32, GLint> uniform_cache;
112}; 100};
113 101
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3411cf9e6..004245431 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -34,6 +34,8 @@ using Operation = const OperationNode&;
34enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; 34enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
35constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 35constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
36 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 36 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
37constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
38 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
37 39
38enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 40enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
39 41
@@ -143,6 +145,7 @@ public:
143 DeclareInputAttributes(); 145 DeclareInputAttributes();
144 DeclareOutputAttributes(); 146 DeclareOutputAttributes();
145 DeclareConstantBuffers(); 147 DeclareConstantBuffers();
148 DeclareGlobalMemory();
146 DeclareSamplers(); 149 DeclareSamplers();
147 150
148 code.AddLine("void execute_" + suffix + "() {"); 151 code.AddLine("void execute_" + suffix + "() {");
@@ -190,12 +193,15 @@ public:
190 ShaderEntries GetShaderEntries() const { 193 ShaderEntries GetShaderEntries() const {
191 ShaderEntries entries; 194 ShaderEntries entries;
192 for (const auto& cbuf : ir.GetConstantBuffers()) { 195 for (const auto& cbuf : ir.GetConstantBuffers()) {
193 ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); 196 entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
194 entries.const_buffers.push_back(desc); 197 cbuf.first);
195 } 198 }
196 for (const auto& sampler : ir.GetSamplers()) { 199 for (const auto& sampler : ir.GetSamplers()) {
197 SamplerEntry desc(sampler, stage, GetSampler(sampler)); 200 entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
198 entries.samplers.push_back(desc); 201 }
202 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
203 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
204 GetGlobalMemoryBlock(gmem));
199 } 205 }
200 entries.clip_distances = ir.GetClipDistances(); 206 entries.clip_distances = ir.GetClipDistances();
201 entries.shader_length = ir.GetLength(); 207 entries.shader_length = ir.GetLength();
@@ -368,13 +374,26 @@ private:
368 void DeclareConstantBuffers() { 374 void DeclareConstantBuffers() {
369 for (const auto& entry : ir.GetConstantBuffers()) { 375 for (const auto& entry : ir.GetConstantBuffers()) {
370 const auto [index, size] = entry; 376 const auto [index, size] = entry;
371 code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {"); 377 code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) +
378 ") uniform " + GetConstBufferBlock(index) + " {");
372 code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); 379 code.AddLine(" vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];");
373 code.AddLine("};"); 380 code.AddLine("};");
374 code.AddNewLine(); 381 code.AddNewLine();
375 } 382 }
376 } 383 }
377 384
385 void DeclareGlobalMemory() {
386 for (const auto& entry : ir.GetGlobalMemoryBases()) {
387 const std::string binding =
388 fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset);
389 code.AddLine("layout (std430, binding = " + binding + ") buffer " +
390 GetGlobalMemoryBlock(entry) + " {");
391 code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
392 code.AddLine("};");
393 code.AddNewLine();
394 }
395 }
396
378 void DeclareSamplers() { 397 void DeclareSamplers() {
379 const auto& samplers = ir.GetSamplers(); 398 const auto& samplers = ir.GetSamplers();
380 for (const auto& sampler : samplers) { 399 for (const auto& sampler : samplers) {
@@ -398,7 +417,8 @@ private:
398 if (sampler.IsShadow()) 417 if (sampler.IsShadow())
399 sampler_type += "Shadow"; 418 sampler_type += "Shadow";
400 419
401 code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); 420 code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) +
421 ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';');
402 } 422 }
403 if (!samplers.empty()) 423 if (!samplers.empty())
404 code.AddNewLine(); 424 code.AddNewLine();
@@ -538,6 +558,12 @@ private:
538 UNREACHABLE_MSG("Unmanaged offset node type"); 558 UNREACHABLE_MSG("Unmanaged offset node type");
539 } 559 }
540 560
561 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
562 const std::string real = Visit(gmem->GetRealAddress());
563 const std::string base = Visit(gmem->GetBaseAddress());
564 const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
565 return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
566
541 } else if (const auto lmem = std::get_if<LmemNode>(node)) { 567 } else if (const auto lmem = std::get_if<LmemNode>(node)) {
542 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 568 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
543 569
@@ -1471,6 +1497,15 @@ private:
1471 return GetDeclarationWithSuffix(index, "cbuf"); 1497 return GetDeclarationWithSuffix(index, "cbuf");
1472 } 1498 }
1473 1499
1500 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
1501 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
1502 }
1503
1504 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
1505 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
1506 suffix);
1507 }
1508
1474 std::string GetConstBufferBlock(u32 index) const { 1509 std::string GetConstBufferBlock(u32 index) const {
1475 return GetDeclarationWithSuffix(index, "cbuf_block"); 1510 return GetDeclarationWithSuffix(index, "cbuf_block");
1476 } 1511 }
@@ -1505,8 +1540,10 @@ private:
1505}; 1540};
1506 1541
1507std::string GetCommonDeclarations() { 1542std::string GetCommonDeclarations() {
1508 return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + 1543 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
1509 "\n" 1544 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
1545 return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
1546 "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
1510 "#define ftoi floatBitsToInt\n" 1547 "#define ftoi floatBitsToInt\n"
1511 "#define ftou floatBitsToUint\n" 1548 "#define ftou floatBitsToUint\n"
1512 "#define itof intBitsToFloat\n" 1549 "#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 396a560d8..0856a1361 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -38,10 +38,6 @@ public:
38 return index; 38 return index;
39 } 39 }
40 40
41 u32 GetHash() const {
42 return (static_cast<u32>(stage) << 16) | index;
43 }
44
45private: 41private:
46 std::string name; 42 std::string name;
47 Maxwell::ShaderStage stage{}; 43 Maxwell::ShaderStage stage{};
@@ -62,18 +58,44 @@ public:
62 return stage; 58 return stage;
63 } 59 }
64 60
65 u32 GetHash() const { 61private:
66 return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex()); 62 std::string name;
63 Maxwell::ShaderStage stage{};
64};
65
66class GlobalMemoryEntry {
67public:
68 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
69 std::string name)
70 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
71
72 u32 GetCbufIndex() const {
73 return cbuf_index;
74 }
75
76 u32 GetCbufOffset() const {
77 return cbuf_offset;
78 }
79
80 const std::string& GetName() const {
81 return name;
82 }
83
84 Maxwell::ShaderStage GetStage() const {
85 return stage;
67 } 86 }
68 87
69private: 88private:
70 std::string name; 89 u32 cbuf_index{};
90 u32 cbuf_offset{};
71 Maxwell::ShaderStage stage{}; 91 Maxwell::ShaderStage stage{};
92 std::string name;
72}; 93};
73 94
74struct ShaderEntries { 95struct ShaderEntries {
75 std::vector<ConstBufferEntry> const_buffers; 96 std::vector<ConstBufferEntry> const_buffers;
76 std::vector<SamplerEntry> samplers; 97 std::vector<SamplerEntry> samplers;
98 std::vector<GlobalMemoryEntry> global_memory_entries;
77 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 99 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
78 std::size_t shader_length{}; 100 std::size_t shader_length{};
79}; 101};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 446d1a93f..04e1db911 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -20,15 +20,14 @@ static constexpr u32 PROGRAM_OFFSET{10};
20ProgramResult GenerateVertexShader(const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
22 22
23 std::string out = "#version 430 core\n"; 23 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
24 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
25 out += "// Shader Unique Id: VS" + id + "\n\n"; 24 out += "// Shader Unique Id: VS" + id + "\n\n";
26 out += GetCommonDeclarations(); 25 out += GetCommonDeclarations();
27 26
28 out += R"( 27 out += R"(
29layout (location = 0) out vec4 position; 28layout (location = 0) out vec4 position;
30 29
31layout(std140) uniform vs_config { 30layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
32 vec4 viewport_flip; 31 vec4 viewport_flip;
33 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 32 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
34 uvec4 alpha_test; 33 uvec4 alpha_test;
@@ -78,7 +77,6 @@ void main() {
78} 77}
79 78
80ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { 79ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
81 // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
82 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 80 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
83 81
84 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; 82 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
@@ -89,7 +87,7 @@ ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
89layout (location = 0) in vec4 gs_position[]; 87layout (location = 0) in vec4 gs_position[];
90layout (location = 0) out vec4 position; 88layout (location = 0) out vec4 position;
91 89
92layout (std140) uniform gs_config { 90layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
93 vec4 viewport_flip; 91 vec4 viewport_flip;
94 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 92 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
95 uvec4 alpha_test; 93 uvec4 alpha_test;
@@ -112,8 +110,7 @@ void main() {
112ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { 110ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
113 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 111 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
114 112
115 std::string out = "#version 430 core\n"; 113 std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
116 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
117 out += "// Shader Unique Id: FS" + id + "\n\n"; 114 out += "// Shader Unique Id: FS" + id + "\n\n";
118 out += GetCommonDeclarations(); 115 out += GetCommonDeclarations();
119 116
@@ -129,7 +126,7 @@ layout (location = 7) out vec4 FragColor7;
129 126
130layout (location = 0) in vec4 position; 127layout (location = 0) in vec4 position;
131 128
132layout (std140) uniform fs_config { 129layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
133 vec4 viewport_flip; 130 vec4 viewport_flip;
134 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding 131 uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
135 uvec4 alpha_test; 132 uvec4 alpha_test;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ae71672d6..04cb386b7 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <vector> 6#include <vector>
7#include <fmt/format.h>
7 8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
119 } 120 }
120 break; 121 break;
121 } 122 }
123 case OpCode::Id::LDG: {
124 const u32 count = [&]() {
125 switch (instr.ldg.type) {
126 case Tegra::Shader::UniformType::Single:
127 return 1;
128 case Tegra::Shader::UniformType::Double:
129 return 2;
130 case Tegra::Shader::UniformType::Quad:
131 case Tegra::Shader::UniformType::UnsignedQuad:
132 return 4;
133 default:
134 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
135 return 1;
136 }
137 }();
138
139 const Node addr_register = GetRegister(instr.gpr8);
140 const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
141 const auto cbuf = std::get_if<CbufNode>(base_address);
142 ASSERT(cbuf != nullptr);
143 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
144 ASSERT(cbuf_offset_imm != nullptr);
145 const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
146
147 bb.push_back(Comment(
148 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
149
150 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
151 used_global_memory_bases.insert(descriptor);
152
153 const Node immediate_offset =
154 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
155 const Node base_real_address =
156 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
157
158 for (u32 i = 0; i < count; ++i) {
159 const Node it_offset = Immediate(i * 4);
160 const Node real_address =
161 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
162 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
163
164 SetTemporal(bb, i, gmem);
165 }
166 for (u32 i = 0; i < count; ++i) {
167 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
168 }
169 break;
170 }
122 case OpCode::Id::ST_A: { 171 case OpCode::Id::ST_A: {
123 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 172 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
124 "Indirect attribute loads are not supported"); 173 "Indirect attribute loads are not supported");
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ef8f94480..c4ecb2e3c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -257,6 +257,15 @@ private:
257 bool is_indirect{}; 257 bool is_indirect{};
258}; 258};
259 259
260struct GlobalMemoryBase {
261 u32 cbuf_index{};
262 u32 cbuf_offset{};
263
264 bool operator<(const GlobalMemoryBase& rhs) const {
265 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
266 }
267};
268
260struct MetaArithmetic { 269struct MetaArithmetic {
261 bool precise{}; 270 bool precise{};
262}; 271};
@@ -478,14 +487,26 @@ private:
478/// Global memory node 487/// Global memory node
479class GmemNode final { 488class GmemNode final {
480public: 489public:
481 explicit constexpr GmemNode(Node address) : address{address} {} 490 explicit constexpr GmemNode(Node real_address, Node base_address,
491 const GlobalMemoryBase& descriptor)
492 : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
482 493
483 Node GetAddress() const { 494 Node GetRealAddress() const {
484 return address; 495 return real_address;
496 }
497
498 Node GetBaseAddress() const {
499 return base_address;
500 }
501
502 const GlobalMemoryBase& GetDescriptor() const {
503 return descriptor;
485 } 504 }
486 505
487private: 506private:
488 const Node address; 507 const Node real_address;
508 const Node base_address;
509 const GlobalMemoryBase descriptor;
489}; 510};
490 511
491/// Commentary, can be dropped 512/// Commentary, can be dropped
@@ -543,6 +564,10 @@ public:
543 return used_clip_distances; 564 return used_clip_distances;
544 } 565 }
545 566
567 const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
568 return used_global_memory_bases;
569 }
570
546 std::size_t GetLength() const { 571 std::size_t GetLength() const {
547 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 572 return static_cast<std::size_t>(coverage_end * sizeof(u64));
548 } 573 }
@@ -734,6 +759,10 @@ private:
734 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 759 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
735 Node op_c, Node imm_lut, bool sets_cc); 760 Node op_c, Node imm_lut, bool sets_cc);
736 761
762 Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
763
764 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
765
737 template <typename... T> 766 template <typename... T>
738 Node Operation(OperationCode code, const T*... operands) { 767 Node Operation(OperationCode code, const T*... operands) {
739 return StoreNode(OperationNode(code, operands...)); 768 return StoreNode(OperationNode(code, operands...));
@@ -786,6 +815,7 @@ private:
786 std::map<u32, ConstBuffer> used_cbufs; 815 std::map<u32, ConstBuffer> used_cbufs;
787 std::set<Sampler> used_samplers; 816 std::set<Sampler> used_samplers;
788 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 817 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
818 std::set<GlobalMemoryBase> used_global_memory_bases;
789 819
790 Tegra::Shader::Header header; 820 Tegra::Shader::Header header;
791}; 821};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
new file mode 100644
index 000000000..d6d29ee9f
--- /dev/null
+++ b/src/video_core/shader/track.cpp
@@ -0,0 +1,76 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13namespace {
14std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
15 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor];
18 if (const auto operation = std::get_if<OperationNode>(node)) {
19 if (operation->GetCode() == operation_code)
20 return {node, cursor};
21 }
22 }
23 return {};
24}
25} // namespace
26
27Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
28 if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
29 // Cbuf found, but it has to be immediate
30 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
31 }
32 if (const auto gpr = std::get_if<GprNode>(tracked)) {
33 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
34 return nullptr;
35 }
36 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
37 // register that it uses as operand
38 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
39 if (!source) {
40 return nullptr;
41 }
42 return TrackCbuf(source, code, new_cursor);
43 }
44 if (const auto operation = std::get_if<OperationNode>(tracked)) {
45 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
46 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
47 // Cbuf found in operand
48 return found;
49 }
50 }
51 return nullptr;
52 }
53 return nullptr;
54}
55
56std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
57 s64 cursor) {
58 for (; cursor >= 0; --cursor) {
59 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
60 if (!found_node) {
61 return {};
62 }
63 const auto operation = std::get_if<OperationNode>(found_node);
64 ASSERT(operation);
65
66 const auto& target = (*operation)[0];
67 if (const auto gpr_target = std::get_if<GprNode>(target)) {
68 if (gpr_target->GetIndex() == tracked->GetIndex()) {
69 return {(*operation)[1], new_cursor};
70 }
71 }
72 }
73 return {};
74}
75
76} // namespace VideoCommon::Shader