summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h34
-rw-r--r--src/video_core/shader/decode/memory.cpp49
-rw-r--r--src/video_core/shader/shader_ir.h38
-rw-r--r--src/video_core/shader/track.cpp76
13 files changed, 380 insertions, 14 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 509ca117a..6113e17ff 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -87,6 +87,7 @@ add_library(video_core STATIC
87 shader/decode.cpp 87 shader/decode.cpp
88 shader/shader_ir.cpp 88 shader/shader_ir.cpp
89 shader/shader_ir.h 89 shader/shader_ir.h
90 shader/track.cpp
90 surface.cpp 91 surface.cpp
91 surface.h 92 surface.h
92 textures/astc.cpp 93 textures/astc.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index cdef97bc6..9989825f8 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -208,6 +208,8 @@ enum class UniformType : u64 {
208 SignedShort = 3, 208 SignedShort = 3,
209 Single = 4, 209 Single = 4,
210 Double = 5, 210 Double = 5,
211 Quad = 6,
212 UnsignedQuad = 7,
211}; 213};
212 214
213enum class StoreType : u64 { 215enum class StoreType : u64 {
@@ -785,6 +787,12 @@ union Instruction {
785 } st_l; 787 } st_l;
786 788
787 union { 789 union {
790 BitField<48, 3, UniformType> type;
791 BitField<46, 2, u64> cache_mode;
792 BitField<20, 24, s64> immediate_offset;
793 } ldg;
794
795 union {
788 BitField<0, 3, u64> pred0; 796 BitField<0, 3, u64> pred0;
789 BitField<3, 3, u64> pred3; 797 BitField<3, 3, u64> pred3;
790 BitField<7, 1, u64> abs_a; 798 BitField<7, 1, u64> abs_a;
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7992b82c4..c7f32feaa 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -4,8 +4,13 @@
4 4
5#include <glad/glad.h> 5#include <glad/glad.h>
6 6
7#include "common/assert.h"
8#include "common/logging/log.h"
9#include "core/core.h"
10#include "core/memory.h"
7#include "video_core/renderer_opengl/gl_global_cache.h" 11#include "video_core/renderer_opengl/gl_global_cache.h"
8#include "video_core/renderer_opengl/gl_rasterizer.h" 12#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h"
9#include "video_core/renderer_opengl/utils.h" 14#include "video_core/renderer_opengl/utils.h"
10 15
11namespace OpenGL { 16namespace OpenGL {
@@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{
18 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
19} 24}
20 25
26void CachedGlobalRegion::Reload(u32 size_) {
27 constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
28
29 size = size_;
30 if (size > max_size) {
31 size = max_size;
32 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_,
33 max_size);
34 }
35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) {
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
51 if (!region) {
52 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size);
54 ReserveGlobalRegion(region);
55 }
56 region->Reload(size);
57 return region;
58}
59
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) {
61 reserve[region->GetAddr()] = region;
62}
63
21GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
22 : RasterizerCache{rasterizer} {} 65 : RasterizerCache{rasterizer} {}
23 66
67GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
68 const GLShader::GlobalMemoryEntry& global_region,
69 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
70
71 auto& gpu{Core::System::GetInstance().GPU()};
72 const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)];
73 const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress(
74 cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset());
75 ASSERT(cbuf_addr);
76
77 const auto actual_addr_gpu = Memory::Read64(*cbuf_addr);
78 const auto size = Memory::Read32(*cbuf_addr + 8);
79 const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
80 ASSERT(actual_addr);
81
82 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr);
84
85 if (!region) {
86 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size);
88 Register(region);
89 }
90
91 return region;
92}
93
24} // namespace OpenGL 94} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 406a735bc..37830bb7c 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -5,9 +5,13 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <unordered_map>
9
8#include <glad/glad.h> 10#include <glad/glad.h>
9 11
12#include "common/assert.h"
10#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_cache.h" 15#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 16#include "video_core/renderer_opengl/gl_resource_manager.h"
13 17
@@ -40,6 +44,9 @@ public:
40 return buffer.handle; 44 return buffer.handle;
41 } 45 }
42 46
47 /// Reloads the global region from guest memory
48 void Reload(u32 size_);
49
43 // TODO(Rodrigo): When global memory is written (STG), implement flushing 50 // TODO(Rodrigo): When global memory is written (STG), implement flushing
44 void Flush() override { 51 void Flush() override {
45 UNIMPLEMENTED(); 52 UNIMPLEMENTED();
@@ -55,6 +62,17 @@ private:
55class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { 62class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
56public: 63public:
57 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); 64 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
65
66 /// Gets the current specified shader stage program
67 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
68 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
69
70private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size);
73 void ReserveGlobalRegion(const GlobalRegion& region);
74
75 std::unordered_map<VAddr, GlobalRegion> reserve;
58}; 76};
59 77
60} // namespace OpenGL 78} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 71829fee0..ca421ef28 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -300,6 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
300 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL 300 // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
301 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. 301 // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
302 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; 302 u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
303 u32 current_gmem_bindpoint = 0;
303 u32 current_texture_bindpoint = 0; 304 u32 current_texture_bindpoint = 0;
304 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 305 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
305 306
@@ -358,6 +359,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
358 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, 359 SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode,
359 current_constbuffer_bindpoint); 360 current_constbuffer_bindpoint);
360 361
362 // Configure global memory regions for this shader stage.
363 current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage),
364 shader, primitive_mode, current_gmem_bindpoint);
365
361 // Configure the textures for this shader stage. 366 // Configure the textures for this shader stage.
362 current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, 367 current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
363 primitive_mode, current_texture_bindpoint); 368 primitive_mode, current_texture_bindpoint);
@@ -993,6 +998,23 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad
993 return current_bindpoint + static_cast<u32>(entries.size()); 998 return current_bindpoint + static_cast<u32>(entries.size());
994} 999}
995 1000
1001u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader,
1002 GLenum primitive_mode, u32 current_bindpoint) {
1003 for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) {
1004 const auto& region =
1005 global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage));
1006 const GLuint block_index{shader->GetProgramResourceIndex(global_region)};
1007 ASSERT(block_index != GL_INVALID_INDEX);
1008
1009 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle());
1010 glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index,
1011 current_bindpoint);
1012 ++current_bindpoint;
1013 }
1014
1015 return current_bindpoint;
1016}
1017
996u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, 1018u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
997 GLenum primitive_mode, u32 current_unit) { 1019 GLenum primitive_mode, u32 current_unit) {
998 MICROPROFILE_SCOPE(OpenGL_Texture); 1020 MICROPROFILE_SCOPE(OpenGL_Texture);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 21c51f874..57ab2f627 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -138,6 +138,16 @@ private:
138 GLenum primitive_mode, u32 current_bindpoint); 138 GLenum primitive_mode, u32 current_bindpoint);
139 139
140 /** 140 /**
141 * Configures the current global memory regions to use for the draw command.
142 * @param stage The shader stage to configure buffers for.
143 * @param shader The shader object that contains the specified stage.
144 * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
145 * @returns The next available bindpoint for use in the next shader stage.
146 */
147 u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
148 GLenum primitive_mode, u32 current_bindpoint);
149
150 /**
141 * Configures the current textures to use for the draw command. 151 * Configures the current textures to use for the draw command.
142 * @param stage The shader stage to configure textures for. 152 * @param stage The shader stage to configure textures for.
143 * @param shader The shader object that contains the specified stage. 153 * @param shader The shader object that contains the specified stage.
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index b3aca39af..54ec23f3a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -108,11 +108,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
108} 108}
109 109
110GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { 110GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
111 const auto search{resource_cache.find(buffer.GetHash())}; 111 const auto search{cbuf_resource_cache.find(buffer.GetHash())};
112 if (search == resource_cache.end()) { 112 if (search == cbuf_resource_cache.end()) {
113 const GLuint index{ 113 const GLuint index{
114 glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; 114 glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
115 resource_cache[buffer.GetHash()] = index; 115 cbuf_resource_cache[buffer.GetHash()] = index;
116 return index;
117 }
118
119 return search->second;
120}
121
122GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) {
123 const auto search{gmem_resource_cache.find(global_mem.GetHash())};
124 if (search == gmem_resource_cache.end()) {
125 const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK,
126 global_mem.GetName().c_str())};
127 gmem_resource_cache[global_mem.GetHash()] = index;
116 return index; 128 return index;
117 } 129 }
118 130
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index e0887dd7b..62b1733b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -76,6 +76,9 @@ public:
76 /// Gets the GL program resource location for the specified resource, caching as needed 76 /// Gets the GL program resource location for the specified resource, caching as needed
77 GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); 77 GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer);
78 78
79 /// Gets the GL program resource location for the specified resource, caching as needed
80 GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem);
81
79 /// Gets the GL uniform location for the specified resource, caching as needed 82 /// Gets the GL uniform location for the specified resource, caching as needed
80 GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); 83 GLint GetUniformLocation(const GLShader::SamplerEntry& sampler);
81 84
@@ -107,7 +110,8 @@ private:
107 OGLProgram triangles_adjacency; 110 OGLProgram triangles_adjacency;
108 } geometry_programs; 111 } geometry_programs;
109 112
110 std::map<u32, GLuint> resource_cache; 113 std::map<u32, GLuint> cbuf_resource_cache;
114 std::map<u32, GLuint> gmem_resource_cache;
111 std::map<u32, GLint> uniform_cache; 115 std::map<u32, GLint> uniform_cache;
112}; 116};
113 117
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3411cf9e6..e072216f0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -34,6 +34,8 @@ using Operation = const OperationNode&;
34enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; 34enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
35constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 35constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
36 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 36 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
37constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
38 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
37 39
38enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 40enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
39 41
@@ -143,6 +145,7 @@ public:
143 DeclareInputAttributes(); 145 DeclareInputAttributes();
144 DeclareOutputAttributes(); 146 DeclareOutputAttributes();
145 DeclareConstantBuffers(); 147 DeclareConstantBuffers();
148 DeclareGlobalMemory();
146 DeclareSamplers(); 149 DeclareSamplers();
147 150
148 code.AddLine("void execute_" + suffix + "() {"); 151 code.AddLine("void execute_" + suffix + "() {");
@@ -190,12 +193,15 @@ public:
190 ShaderEntries GetShaderEntries() const { 193 ShaderEntries GetShaderEntries() const {
191 ShaderEntries entries; 194 ShaderEntries entries;
192 for (const auto& cbuf : ir.GetConstantBuffers()) { 195 for (const auto& cbuf : ir.GetConstantBuffers()) {
193 ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); 196 entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
194 entries.const_buffers.push_back(desc); 197 cbuf.first);
195 } 198 }
196 for (const auto& sampler : ir.GetSamplers()) { 199 for (const auto& sampler : ir.GetSamplers()) {
197 SamplerEntry desc(sampler, stage, GetSampler(sampler)); 200 entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
198 entries.samplers.push_back(desc); 201 }
202 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
203 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
204 GetGlobalMemoryBlock(gmem));
199 } 205 }
200 entries.clip_distances = ir.GetClipDistances(); 206 entries.clip_distances = ir.GetClipDistances();
201 entries.shader_length = ir.GetLength(); 207 entries.shader_length = ir.GetLength();
@@ -375,6 +381,15 @@ private:
375 } 381 }
376 } 382 }
377 383
384 void DeclareGlobalMemory() {
385 for (const auto& entry : ir.GetGlobalMemoryBases()) {
386 code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {");
387 code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
388 code.AddLine("};");
389 code.AddNewLine();
390 }
391 }
392
378 void DeclareSamplers() { 393 void DeclareSamplers() {
379 const auto& samplers = ir.GetSamplers(); 394 const auto& samplers = ir.GetSamplers();
380 for (const auto& sampler : samplers) { 395 for (const auto& sampler : samplers) {
@@ -538,6 +553,12 @@ private:
538 UNREACHABLE_MSG("Unmanaged offset node type"); 553 UNREACHABLE_MSG("Unmanaged offset node type");
539 } 554 }
540 555
556 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
557 const std::string real = Visit(gmem->GetRealAddress());
558 const std::string base = Visit(gmem->GetBaseAddress());
559 const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
560 return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
561
541 } else if (const auto lmem = std::get_if<LmemNode>(node)) { 562 } else if (const auto lmem = std::get_if<LmemNode>(node)) {
542 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 563 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
543 564
@@ -1471,6 +1492,15 @@ private:
1471 return GetDeclarationWithSuffix(index, "cbuf"); 1492 return GetDeclarationWithSuffix(index, "cbuf");
1472 } 1493 }
1473 1494
1495 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
1496 return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
1497 }
1498
1499 std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
1500 return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
1501 suffix);
1502 }
1503
1474 std::string GetConstBufferBlock(u32 index) const { 1504 std::string GetConstBufferBlock(u32 index) const {
1475 return GetDeclarationWithSuffix(index, "cbuf_block"); 1505 return GetDeclarationWithSuffix(index, "cbuf_block");
1476 } 1506 }
@@ -1505,8 +1535,10 @@ private:
1505}; 1535};
1506 1536
1507std::string GetCommonDeclarations() { 1537std::string GetCommonDeclarations() {
1508 return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + 1538 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
1509 "\n" 1539 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
1540 return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
1541 "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
1510 "#define ftoi floatBitsToInt\n" 1542 "#define ftoi floatBitsToInt\n"
1511 "#define ftou floatBitsToUint\n" 1543 "#define ftou floatBitsToUint\n"
1512 "#define itof intBitsToFloat\n" 1544 "#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 396a560d8..e47bc3729 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -71,9 +71,43 @@ private:
71 Maxwell::ShaderStage stage{}; 71 Maxwell::ShaderStage stage{};
72}; 72};
73 73
74class GlobalMemoryEntry {
75public:
76 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
77 std::string name)
78 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
79
80 u32 GetCbufIndex() const {
81 return cbuf_index;
82 }
83
84 u32 GetCbufOffset() const {
85 return cbuf_offset;
86 }
87
88 const std::string& GetName() const {
89 return name;
90 }
91
92 Maxwell::ShaderStage GetStage() const {
93 return stage;
94 }
95
96 u32 GetHash() const {
97 return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset;
98 }
99
100private:
101 u32 cbuf_index{};
102 u32 cbuf_offset{};
103 Maxwell::ShaderStage stage{};
104 std::string name;
105};
106
74struct ShaderEntries { 107struct ShaderEntries {
75 std::vector<ConstBufferEntry> const_buffers; 108 std::vector<ConstBufferEntry> const_buffers;
76 std::vector<SamplerEntry> samplers; 109 std::vector<SamplerEntry> samplers;
110 std::vector<GlobalMemoryEntry> global_memory_entries;
77 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 111 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
78 std::size_t shader_length{}; 112 std::size_t shader_length{};
79}; 113};
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ae71672d6..04cb386b7 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <vector> 6#include <vector>
7#include <fmt/format.h>
7 8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
119 } 120 }
120 break; 121 break;
121 } 122 }
123 case OpCode::Id::LDG: {
124 const u32 count = [&]() {
125 switch (instr.ldg.type) {
126 case Tegra::Shader::UniformType::Single:
127 return 1;
128 case Tegra::Shader::UniformType::Double:
129 return 2;
130 case Tegra::Shader::UniformType::Quad:
131 case Tegra::Shader::UniformType::UnsignedQuad:
132 return 4;
133 default:
134 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
135 return 1;
136 }
137 }();
138
139 const Node addr_register = GetRegister(instr.gpr8);
140 const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
141 const auto cbuf = std::get_if<CbufNode>(base_address);
142 ASSERT(cbuf != nullptr);
143 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
144 ASSERT(cbuf_offset_imm != nullptr);
145 const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
146
147 bb.push_back(Comment(
148 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
149
150 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
151 used_global_memory_bases.insert(descriptor);
152
153 const Node immediate_offset =
154 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
155 const Node base_real_address =
156 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
157
158 for (u32 i = 0; i < count; ++i) {
159 const Node it_offset = Immediate(i * 4);
160 const Node real_address =
161 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
162 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
163
164 SetTemporal(bb, i, gmem);
165 }
166 for (u32 i = 0; i < count; ++i) {
167 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
168 }
169 break;
170 }
122 case OpCode::Id::ST_A: { 171 case OpCode::Id::ST_A: {
123 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 172 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
124 "Indirect attribute loads are not supported"); 173 "Indirect attribute loads are not supported");
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ef8f94480..c4ecb2e3c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -257,6 +257,15 @@ private:
257 bool is_indirect{}; 257 bool is_indirect{};
258}; 258};
259 259
260struct GlobalMemoryBase {
261 u32 cbuf_index{};
262 u32 cbuf_offset{};
263
264 bool operator<(const GlobalMemoryBase& rhs) const {
265 return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
266 }
267};
268
260struct MetaArithmetic { 269struct MetaArithmetic {
261 bool precise{}; 270 bool precise{};
262}; 271};
@@ -478,14 +487,26 @@ private:
478/// Global memory node 487/// Global memory node
479class GmemNode final { 488class GmemNode final {
480public: 489public:
481 explicit constexpr GmemNode(Node address) : address{address} {} 490 explicit constexpr GmemNode(Node real_address, Node base_address,
491 const GlobalMemoryBase& descriptor)
492 : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
482 493
483 Node GetAddress() const { 494 Node GetRealAddress() const {
484 return address; 495 return real_address;
496 }
497
498 Node GetBaseAddress() const {
499 return base_address;
500 }
501
502 const GlobalMemoryBase& GetDescriptor() const {
503 return descriptor;
485 } 504 }
486 505
487private: 506private:
488 const Node address; 507 const Node real_address;
508 const Node base_address;
509 const GlobalMemoryBase descriptor;
489}; 510};
490 511
491/// Commentary, can be dropped 512/// Commentary, can be dropped
@@ -543,6 +564,10 @@ public:
543 return used_clip_distances; 564 return used_clip_distances;
544 } 565 }
545 566
567 const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
568 return used_global_memory_bases;
569 }
570
546 std::size_t GetLength() const { 571 std::size_t GetLength() const {
547 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 572 return static_cast<std::size_t>(coverage_end * sizeof(u64));
548 } 573 }
@@ -734,6 +759,10 @@ private:
734 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 759 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
735 Node op_c, Node imm_lut, bool sets_cc); 760 Node op_c, Node imm_lut, bool sets_cc);
736 761
762 Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
763
764 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
765
737 template <typename... T> 766 template <typename... T>
738 Node Operation(OperationCode code, const T*... operands) { 767 Node Operation(OperationCode code, const T*... operands) {
739 return StoreNode(OperationNode(code, operands...)); 768 return StoreNode(OperationNode(code, operands...));
@@ -786,6 +815,7 @@ private:
786 std::map<u32, ConstBuffer> used_cbufs; 815 std::map<u32, ConstBuffer> used_cbufs;
787 std::set<Sampler> used_samplers; 816 std::set<Sampler> used_samplers;
788 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 817 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
818 std::set<GlobalMemoryBase> used_global_memory_bases;
789 819
790 Tegra::Shader::Header header; 820 Tegra::Shader::Header header;
791}; 821};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
new file mode 100644
index 000000000..d6d29ee9f
--- /dev/null
+++ b/src/video_core/shader/track.cpp
@@ -0,0 +1,76 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <variant>
8
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13namespace {
14std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
15 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor];
18 if (const auto operation = std::get_if<OperationNode>(node)) {
19 if (operation->GetCode() == operation_code)
20 return {node, cursor};
21 }
22 }
23 return {};
24}
25} // namespace
26
27Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
28 if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
29 // Cbuf found, but it has to be immediate
30 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
31 }
32 if (const auto gpr = std::get_if<GprNode>(tracked)) {
33 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
34 return nullptr;
35 }
36 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
37 // register that it uses as operand
38 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
39 if (!source) {
40 return nullptr;
41 }
42 return TrackCbuf(source, code, new_cursor);
43 }
44 if (const auto operation = std::get_if<OperationNode>(tracked)) {
45 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
46 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
47 // Cbuf found in operand
48 return found;
49 }
50 }
51 return nullptr;
52 }
53 return nullptr;
54}
55
56std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
57 s64 cursor) {
58 for (; cursor >= 0; --cursor) {
59 const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
60 if (!found_node) {
61 return {};
62 }
63 const auto operation = std::get_if<OperationNode>(found_node);
64 ASSERT(operation);
65
66 const auto& target = (*operation)[0];
67 if (const auto gpr_target = std::get_if<GprNode>(target)) {
68 if (gpr_target->GetIndex() == tracked->GetIndex()) {
69 return {(*operation)[1], new_cursor};
70 }
71 }
72 }
73 return {};
74}
75
76} // namespace VideoCommon::Shader