summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/shader_bytecode.h6
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp14
-rw-r--r--src/video_core/shader/decode/memory.cpp109
-rw-r--r--src/video_core/shader/shader_ir.h16
11 files changed, 186 insertions, 89 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 2e1e96c81..acf475289 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -792,6 +792,12 @@ union Instruction {
792 } ldg; 792 } ldg;
793 793
794 union { 794 union {
795 BitField<48, 3, UniformType> type;
796 BitField<46, 2, u64> cache_mode;
797 BitField<20, 24, s64> immediate_offset;
798 } stg;
799
800 union {
795 BitField<0, 3, u64> pred0; 801 BitField<0, 3, u64> pred0;
796 BitField<3, 3, u64> pred3; 802 BitField<3, 3, u64> pred3;
797 BitField<7, 1, u64> abs_a; 803 BitField<7, 1, u64> abs_a;
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 8d9ee81f1..ea4a593af 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -14,28 +14,28 @@
14 14
15namespace OpenGL { 15namespace OpenGL {
16 16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) 17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { 18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
19 buffer.Create(); 20 buffer.Create();
20 // Bind and unbind the buffer so it gets allocated by the driver
21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
23 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); 21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24} 22}
25 23
26void CachedGlobalRegion::Reload(u32 size_) { 24CachedGlobalRegion::~CachedGlobalRegion() = default;
27 constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize);
28 25
26void CachedGlobalRegion::Reload(u32 size_) {
29 size = size_; 27 size = size_;
30 if (size > max_size) { 28 if (size > max_size) {
31 size = max_size; 29 size = max_size;
32 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, 30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
33 max_size); 31 max_size);
34 } 32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35 35
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer 36void CachedGlobalRegion::Flush() {
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); 38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39} 39}
40 40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { 41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
@@ -46,14 +46,16 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr,
46 return search->second; 46 return search->second;
47} 47}
48 48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u32 size, 49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u8* host_ptr) { 50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; 51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) { 52 if (!region) {
53 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; 54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr); 55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr); 56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
57 ReserveGlobalRegion(region); 59 ReserveGlobalRegion(region);
58 } 60 }
59 region->Reload(size); 61 region->Reload(size);
@@ -65,7 +67,11 @@ void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
65} 67}
66 68
67GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
68 : RasterizerCache{rasterizer} {} 70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
69 75
70GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( 76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
71 const GLShader::GlobalMemoryEntry& global_region, 77 const GLShader::GlobalMemoryEntry& global_region,
@@ -73,7 +79,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
73 79
74 auto& gpu{Core::System::GetInstance().GPU()}; 80 auto& gpu{Core::System::GetInstance().GPU()};
75 auto& memory_manager{gpu.MemoryManager()}; 81 auto& memory_manager{gpu.MemoryManager()};
76 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]}; 82 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
77 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + 83 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
78 global_region.GetCbufOffset()}; 84 global_region.GetCbufOffset()};
79 const auto actual_addr{memory_manager.Read<u64>(addr)}; 85 const auto actual_addr{memory_manager.Read<u64>(addr)};
@@ -85,7 +91,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
85 91
86 if (!region) { 92 if (!region) {
87 // No global region found - create a new one 93 // No global region found - create a new one
88 region = GetUncachedGlobalRegion(actual_addr, size, host_ptr); 94 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
89 Register(region); 95 Register(region);
90 } 96 }
91 97
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index 5a21ab66f..196e6e278 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -19,7 +19,7 @@ namespace OpenGL {
19 19
20namespace GLShader { 20namespace GLShader {
21class GlobalMemoryEntry; 21class GlobalMemoryEntry;
22} // namespace GLShader 22}
23 23
24class RasterizerOpenGL; 24class RasterizerOpenGL;
25class CachedGlobalRegion; 25class CachedGlobalRegion;
@@ -27,7 +27,8 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27 27
28class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
29public: 29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
31 32
32 VAddr GetCpuAddr() const override { 33 VAddr GetCpuAddr() const override {
33 return cpu_addr; 34 return cpu_addr;
@@ -45,14 +46,14 @@ public:
45 /// Reloads the global region from guest memory 46 /// Reloads the global region from guest memory
46 void Reload(u32 size_); 47 void Reload(u32 size_);
47 48
48 // TODO(Rodrigo): When global memory is written (STG), implement flushing 49 void Flush() override;
49 void Flush() override {
50 UNIMPLEMENTED();
51 }
52 50
53private: 51private:
54 VAddr cpu_addr{}; 52 VAddr cpu_addr{};
53 u8* host_ptr{};
55 u32 size{}; 54 u32 size{};
55 u32 max_size{};
56
56 OGLBuffer buffer; 57 OGLBuffer buffer;
57}; 58};
58 59
@@ -66,10 +67,11 @@ public:
66 67
67private: 68private:
68 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; 69 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
69 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u32 size, u8* host_ptr); 70 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
70 void ReserveGlobalRegion(GlobalRegion region); 71 void ReserveGlobalRegion(GlobalRegion region);
71 72
72 std::unordered_map<CacheAddr, GlobalRegion> reserve; 73 std::unordered_map<CacheAddr, GlobalRegion> reserve;
74 u32 max_ssbo_size{};
73}; 75};
74 76
75} // namespace OpenGL 77} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d250d5cbb..ea42fd060 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -756,6 +756,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
756 return; 756 return;
757 } 757 }
758 res_cache.FlushRegion(addr, size); 758 res_cache.FlushRegion(addr, size);
759 global_cache.FlushRegion(addr, size);
759} 760}
760 761
761void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 762void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -953,6 +954,9 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
953 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 954 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
954 const auto& entry{entries[bindpoint]}; 955 const auto& entry{entries[bindpoint]};
955 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 956 const auto& region{global_cache.GetGlobalRegion(entry, stage)};
957 if (entry.IsWritten()) {
958 region->MarkAsModified(true, global_cache);
959 }
956 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 960 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0,
957 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 961 static_cast<GLsizeiptr>(region->GetSizeInBytes()));
958 } 962 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e4c64ae71..d4c2cf80e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -71,10 +71,6 @@ public:
71 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, 71 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
72 "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); 72 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
73 73
74 static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
75 static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
76 "The maximum size of a global memory must be a multiple of the size of float");
77
78private: 74private:
79 class SamplerInfo { 75 class SamplerInfo {
80 public: 76 public:
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 28e490b3c..445048daf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,8 +45,6 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
45enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; 45enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
46constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 46constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
47 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 47 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
48constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
50 48
51class ShaderWriter { 49class ShaderWriter {
52public: 50public:
@@ -208,8 +206,10 @@ public:
208 for (const auto& sampler : ir.GetSamplers()) { 206 for (const auto& sampler : ir.GetSamplers()) {
209 entries.samplers.emplace_back(sampler); 207 entries.samplers.emplace_back(sampler);
210 } 208 }
211 for (const auto& gmem : ir.GetGlobalMemoryBases()) { 209 for (const auto& gmem_pair : ir.GetGlobalMemory()) {
212 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); 210 const auto& [base, usage] = gmem_pair;
211 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
212 usage.is_read, usage.is_written);
213 } 213 }
214 entries.clip_distances = ir.GetClipDistances(); 214 entries.clip_distances = ir.GetClipDistances();
215 entries.shader_length = ir.GetLength(); 215 entries.shader_length = ir.GetLength();
@@ -380,12 +380,22 @@ private:
380 } 380 }
381 381
382 void DeclareGlobalMemory() { 382 void DeclareGlobalMemory() {
383 for (const auto& entry : ir.GetGlobalMemoryBases()) { 383 for (const auto& gmem : ir.GetGlobalMemory()) {
384 const auto& [base, usage] = gmem;
385
386 // Since we don't know how the shader will use the shader, hint the driver to disable as
387 // much optimizations as possible
388 std::string qualifier = "coherent volatile";
389 if (usage.is_read && !usage.is_written)
390 qualifier += " readonly";
391 else if (usage.is_written && !usage.is_read)
392 qualifier += " writeonly";
393
384 const std::string binding = 394 const std::string binding =
385 fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); 395 fmt::format("GMEM_BINDING_{}_{}", base.cbuf_index, base.cbuf_offset);
386 code.AddLine("layout (std430, binding = " + binding + ") buffer " + 396 code.AddLine("layout (std430, binding = " + binding + ") " + qualifier + " buffer " +
387 GetGlobalMemoryBlock(entry) + " {"); 397 GetGlobalMemoryBlock(base) + " {");
388 code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); 398 code.AddLine(" float " + GetGlobalMemory(base) + "[];");
389 code.AddLine("};"); 399 code.AddLine("};");
390 code.AddNewLine(); 400 code.AddNewLine();
391 } 401 }
@@ -868,6 +878,12 @@ private:
868 } else if (const auto lmem = std::get_if<LmemNode>(dest)) { 878 } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
869 target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]"; 879 target = GetLocalMemory() + "[ftou(" + Visit(lmem->GetAddress()) + ") / 4]";
870 880
881 } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
882 const std::string real = Visit(gmem->GetRealAddress());
883 const std::string base = Visit(gmem->GetBaseAddress());
884 const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
885 target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
886
871 } else { 887 } else {
872 UNREACHABLE_MSG("Assign called without a proper target"); 888 UNREACHABLE_MSG("Assign called without a proper target");
873 } 889 }
@@ -1621,9 +1637,7 @@ private:
1621 1637
1622std::string GetCommonDeclarations() { 1638std::string GetCommonDeclarations() {
1623 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); 1639 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
1624 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
1625 return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + 1640 return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
1626 "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
1627 "#define ftoi floatBitsToInt\n" 1641 "#define ftoi floatBitsToInt\n"
1628 "#define ftou floatBitsToUint\n" 1642 "#define ftou floatBitsToUint\n"
1629 "#define itof intBitsToFloat\n" 1643 "#define itof intBitsToFloat\n"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 4e04ab2f8..55b3d4d7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -39,8 +39,9 @@ private:
39 39
40class GlobalMemoryEntry { 40class GlobalMemoryEntry {
41public: 41public:
42 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset) 42 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, bool is_read, bool is_written)
43 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {} 43 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, is_read{is_read}, is_written{
44 is_written} {}
44 45
45 u32 GetCbufIndex() const { 46 u32 GetCbufIndex() const {
46 return cbuf_index; 47 return cbuf_index;
@@ -50,9 +51,19 @@ public:
50 return cbuf_offset; 51 return cbuf_offset;
51 } 52 }
52 53
54 bool IsRead() const {
55 return is_read;
56 }
57
58 bool IsWritten() const {
59 return is_written;
60 }
61
53private: 62private:
54 u32 cbuf_index{}; 63 u32 cbuf_index{};
55 u32 cbuf_offset{}; 64 u32 cbuf_offset{};
65 bool is_read{};
66 bool is_written{};
56}; 67};
57 68
58struct ShaderEntries { 69struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 8a43eb157..d5890a375 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -337,11 +337,16 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
337 for (u32 i = 0; i < global_memory_count; ++i) { 337 for (u32 i = 0; i < global_memory_count; ++i) {
338 u32 cbuf_index{}; 338 u32 cbuf_index{};
339 u32 cbuf_offset{}; 339 u32 cbuf_offset{};
340 u8 is_read{};
341 u8 is_written{};
340 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) || 342 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
341 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) { 343 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32) ||
344 file.ReadBytes(&is_read, sizeof(u8)) != sizeof(u8) ||
345 file.ReadBytes(&is_written, sizeof(u8)) != sizeof(u8)) {
342 return {}; 346 return {};
343 } 347 }
344 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset); 348 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read != 0,
349 is_written != 0);
345 } 350 }
346 351
347 for (auto& clip_distance : entry.entries.clip_distances) { 352 for (auto& clip_distance : entry.entries.clip_distances) {
@@ -397,7 +402,9 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
397 return false; 402 return false;
398 for (const auto& gmem : entries.global_memory_entries) { 403 for (const auto& gmem : entries.global_memory_entries) {
399 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 || 404 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
400 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) { 405 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1 ||
406 file.WriteObject(static_cast<u8>(gmem.IsRead() ? 1 : 0)) != 1 ||
407 file.WriteObject(static_cast<u8>(gmem.IsWritten() ? 1 : 0)) != 1) {
401 return false; 408 return false;
402 } 409 }
403 } 410 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index e0a6f5e87..25500f9a3 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -191,8 +191,9 @@ public:
191 for (const auto& cbuf : ir.GetConstantBuffers()) { 191 for (const auto& cbuf : ir.GetConstantBuffers()) {
192 entries.const_buffers.emplace_back(cbuf.second, cbuf.first); 192 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
193 } 193 }
194 for (const auto& gmem : ir.GetGlobalMemoryBases()) { 194 for (const auto& gmem_pair : ir.GetGlobalMemory()) {
195 entries.global_buffers.emplace_back(gmem.cbuf_index, gmem.cbuf_offset); 195 const auto& [base, usage] = gmem_pair;
196 entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset);
196 } 197 }
197 for (const auto& sampler : ir.GetSamplers()) { 198 for (const auto& sampler : ir.GetSamplers()) {
198 entries.samplers.emplace_back(sampler); 199 entries.samplers.emplace_back(sampler);
@@ -225,7 +226,7 @@ private:
225 return current_binding; 226 return current_binding;
226 }; 227 };
227 const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size()); 228 const_buffers_base_binding = Allocate(ir.GetConstantBuffers().size());
228 global_buffers_base_binding = Allocate(ir.GetGlobalMemoryBases().size()); 229 global_buffers_base_binding = Allocate(ir.GetGlobalMemory().size());
229 samplers_base_binding = Allocate(ir.GetSamplers().size()); 230 samplers_base_binding = Allocate(ir.GetSamplers().size());
230 231
231 ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE, 232 ASSERT_MSG(binding_iterator - binding_base < STAGE_BINDING_STRIDE,
@@ -390,14 +391,15 @@ private:
390 391
391 void DeclareGlobalBuffers() { 392 void DeclareGlobalBuffers() {
392 u32 binding = global_buffers_base_binding; 393 u32 binding = global_buffers_base_binding;
393 for (const auto& entry : ir.GetGlobalMemoryBases()) { 394 for (const auto& entry : ir.GetGlobalMemory()) {
395 const auto [base, usage] = entry;
394 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); 396 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
395 AddGlobalVariable( 397 AddGlobalVariable(
396 Name(id, fmt::format("gmem_{}_{}", entry.cbuf_index, entry.cbuf_offset))); 398 Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
397 399
398 Decorate(id, spv::Decoration::Binding, binding++); 400 Decorate(id, spv::Decoration::Binding, binding++);
399 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); 401 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
400 global_buffers.emplace(entry, id); 402 global_buffers.emplace(base, id);
401 } 403 }
402 } 404 }
403 405
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eed..ff19ada55 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -18,6 +18,23 @@ using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20 20
21namespace {
22u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
23 switch (uniform_type) {
24 case Tegra::Shader::UniformType::Single:
25 return 1;
26 case Tegra::Shader::UniformType::Double:
27 return 2;
28 case Tegra::Shader::UniformType::Quad:
29 case Tegra::Shader::UniformType::UnsignedQuad:
30 return 4;
31 default:
32 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
33 return 1;
34 }
35}
36} // namespace
37
21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 38u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]}; 39 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr); 40 const auto opcode = OpCode::Decode(instr);
@@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
126 break; 143 break;
127 } 144 }
128 case OpCode::Id::LDG: { 145 case OpCode::Id::LDG: {
129 const u32 count = [&]() { 146 const auto [real_address_base, base_address, descriptor] =
130 switch (instr.ldg.type) { 147 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
131 case Tegra::Shader::UniformType::Single: 148 static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
132 return 1;
133 case Tegra::Shader::UniformType::Double:
134 return 2;
135 case Tegra::Shader::UniformType::Quad:
136 case Tegra::Shader::UniformType::UnsignedQuad:
137 return 4;
138 default:
139 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
140 return 1;
141 }
142 }();
143
144 const Node addr_register = GetRegister(instr.gpr8);
145 const Node base_address =
146 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
147 const auto cbuf = std::get_if<CbufNode>(base_address);
148 ASSERT(cbuf != nullptr);
149 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
150 ASSERT(cbuf_offset_imm != nullptr);
151 const auto cbuf_offset = cbuf_offset_imm->GetValue();
152
153 bb.push_back(Comment(
154 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
155
156 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
157 used_global_memory_bases.insert(descriptor);
158
159 const Node immediate_offset =
160 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
161 const Node base_real_address =
162 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
163 149
150 const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
164 for (u32 i = 0; i < count; ++i) { 151 for (u32 i = 0; i < count; ++i) {
165 const Node it_offset = Immediate(i * 4); 152 const Node it_offset = Immediate(i * 4);
166 const Node real_address = 153 const Node real_address =
167 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); 154 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
168 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); 155 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
169 156
170 SetTemporal(bb, i, gmem); 157 SetTemporal(bb, i, gmem);
@@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
174 } 161 }
175 break; 162 break;
176 } 163 }
164 case OpCode::Id::STG: {
165 const auto [real_address_base, base_address, descriptor] =
166 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
167 static_cast<u32>(instr.stg.immediate_offset.Value()), true);
168
169 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
170 SetTemporal(bb, 0, real_address_base);
171
172 const u32 count = GetUniformTypeElementsCount(instr.stg.type);
173 for (u32 i = 0; i < count; ++i) {
174 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
175 }
176 for (u32 i = 0; i < count; ++i) {
177 const Node it_offset = Immediate(i * 4);
178 const Node real_address =
179 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
180 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
181
182 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
183 }
184 break;
185 }
177 case OpCode::Id::ST_A: { 186 case OpCode::Id::ST_A: {
178 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 187 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
179 "Indirect attribute loads are not supported"); 188 "Indirect attribute loads are not supported");
@@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
236 return pc; 245 return pc;
237} 246}
238 247
248std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
249 Node addr_register,
250 u32 immediate_offset,
251 bool is_write) {
252 const Node base_address{
253 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
254 const auto cbuf = std::get_if<CbufNode>(base_address);
255 ASSERT(cbuf != nullptr);
256 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
257 ASSERT(cbuf_offset_imm != nullptr);
258 const auto cbuf_offset = cbuf_offset_imm->GetValue();
259
260 bb.push_back(
261 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
262
263 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
264 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
265 auto& usage = entry->second;
266 if (is_write) {
267 usage.is_written = true;
268 } else {
269 usage.is_read = true;
270 }
271
272 const auto real_address =
273 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
274
275 return {real_address, base_address, descriptor};
276}
277
239} // namespace VideoCommon::Shader 278} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d3..1afab08c0 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -276,6 +276,11 @@ struct GlobalMemoryBase {
276 } 276 }
277}; 277};
278 278
279struct GlobalMemoryUsage {
280 bool is_read{};
281 bool is_written{};
282};
283
279struct MetaArithmetic { 284struct MetaArithmetic {
280 bool precise{}; 285 bool precise{};
281}; 286};
@@ -578,8 +583,8 @@ public:
578 return used_clip_distances; 583 return used_clip_distances;
579 } 584 }
580 585
581 const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { 586 const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
582 return used_global_memory_bases; 587 return used_global_memory;
583 } 588 }
584 589
585 std::size_t GetLength() const { 590 std::size_t GetLength() const {
@@ -781,6 +786,11 @@ private:
781 786
782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); 787 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
783 788
789 std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
790 Node addr_register,
791 u32 immediate_offset,
792 bool is_write);
793
784 template <typename... T> 794 template <typename... T>
785 Node Operation(OperationCode code, const T*... operands) { 795 Node Operation(OperationCode code, const T*... operands) {
786 return StoreNode(OperationNode(code, operands...)); 796 return StoreNode(OperationNode(code, operands...));
@@ -834,7 +844,7 @@ private:
834 std::map<u32, ConstBuffer> used_cbufs; 844 std::map<u32, ConstBuffer> used_cbufs;
835 std::set<Sampler> used_samplers; 845 std::set<Sampler> used_samplers;
836 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 846 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
837 std::set<GlobalMemoryBase> used_global_memory_bases; 847 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
838 848
839 Tegra::Shader::Header header; 849 Tegra::Shader::Header header;
840}; 850};