summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp47
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h9
3 files changed, 54 insertions, 33 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index b7e9ed2e9..f4db62787 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -376,9 +376,11 @@ private:
376 std::string temporary = AllocTemporary(); 376 std::string temporary = AllocTemporary();
377 std::string address; 377 std::string address;
378 std::string_view opname; 378 std::string_view opname;
379 bool robust = false;
379 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { 380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
380 address = GlobalMemoryPointer(*gmem); 381 address = GlobalMemoryPointer(*gmem);
381 opname = "ATOM"; 382 opname = "ATOM";
383 robust = true;
382 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { 384 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
383 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); 385 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
384 opname = "ATOMS"; 386 opname = "ATOMS";
@@ -386,7 +388,15 @@ private:
386 UNREACHABLE(); 388 UNREACHABLE();
387 return "{0, 0, 0, 0}"; 389 return "{0, 0, 0, 0}";
388 } 390 }
391 if (robust) {
392 AddLine("IF NE.x;");
393 }
389 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); 394 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
395 if (robust) {
396 AddLine("ELSE;");
397 AddLine("MOV.S {}, 0;", temporary);
398 AddLine("ENDIF;");
399 }
390 return temporary; 400 return temporary;
391 } 401 }
392 402
@@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
980} 990}
981 991
982void ARBDecompiler::DeclareGlobalMemory() { 992void ARBDecompiler::DeclareGlobalMemory() {
983 const std::size_t num_entries = ir.GetGlobalMemory().size(); 993 const size_t num_entries = ir.GetGlobalMemory().size();
984 if (num_entries > 0) { 994 if (num_entries > 0) {
985 const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; 995 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
986 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
987 } 996 }
988} 997}
989 998
@@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
1363 1372
1364 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1373 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1365 std::string temporary = AllocTemporary(); 1374 std::string temporary = AllocTemporary();
1366 AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); 1375 AddLine("MOV {}, 0;", temporary);
1376 AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
1367 return temporary; 1377 return temporary;
1368 } 1378 }
1369 1379
@@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
1441} 1451}
1442 1452
1443std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { 1453std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1454 // Read a bindless SSBO, return its address and set CC accordingly
1455 // address = c[binding].xy
1456 // length = c[binding].z
1444 const u32 binding = global_memory_names.at(gmem.GetDescriptor()); 1457 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1445 const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
1446 1458
1447 const std::string pointer = AllocLongVectorTemporary(); 1459 const std::string pointer = AllocLongVectorTemporary();
1448 std::string temporary = AllocTemporary(); 1460 std::string temporary = AllocTemporary();
1449 1461
1450 const u32 local_index = binding / 2; 1462 AddLine("PK64.U {}, c[{}];", pointer, binding);
1451 AddLine("PK64.U {}, c[{}];", pointer, local_index);
1452 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), 1463 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1453 Visit(gmem.GetBaseAddress())); 1464 Visit(gmem.GetBaseAddress()));
1454 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); 1465 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1455 AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); 1466 AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
1467 // Compare offset to length and set CC
1468 AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
1456 return fmt::format("{}.x", pointer); 1469 return fmt::format("{}.x", pointer);
1457} 1470}
1458 1471
@@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
1552 ResetTemporaries(); 1565 ResetTemporaries();
1553 return {}; 1566 return {};
1554 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1567 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1568 AddLine("IF NE.x;");
1555 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); 1569 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1570 AddLine("ENDIF;");
1556 ResetTemporaries(); 1571 ResetTemporaries();
1557 return {}; 1572 return {};
1558 } else { 1573 } else {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bbb2eb17c..36bf92808 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) {
139 (state ? glEnable : glDisable)(cap); 139 (state ? glEnable : glDisable)(cap);
140} 140}
141 141
142void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { 142void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
143 if (num_entries == 0) { 143 if (num_ssbos == 0) {
144 return; 144 return;
145 } 145 }
146 if (num_entries % 2 == 1) { 146 glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
147 pointers[num_entries] = 0; 147 reinterpret_cast<const GLuint*>(ssbos));
148 }
149 const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
150 glProgramLocalParametersI4uivNV(target, 0, num_vectors,
151 reinterpret_cast<const GLuint*>(pointers));
152} 148}
153 149
154} // Anonymous namespace 150} // Anonymous namespace
@@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
900} 896}
901 897
902void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { 898void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
903 static constexpr std::array PARAMETER_LUT = { 899 static constexpr std::array PARAMETER_LUT{
904 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 900 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
905 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, 901 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
906 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; 902 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
907 903 };
908 MICROPROFILE_SCOPE(OpenGL_UBO); 904 MICROPROFILE_SCOPE(OpenGL_UBO);
909 const auto& stages = maxwell3d.state.shader_stages; 905 const auto& stages = maxwell3d.state.shader_stages;
910 const auto& shader_stage = stages[stage_index]; 906 const auto& shader_stage = stages[stage_index];
@@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
1007 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; 1003 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1008 const auto& entries{shader->GetEntries().global_memory_entries}; 1004 const auto& entries{shader->GetEntries().global_memory_entries};
1009 1005
1010 std::array<GLuint64EXT, 32> pointers; 1006 std::array<BindlessSSBO, 32> ssbos;
1011 ASSERT(entries.size() < pointers.size()); 1007 ASSERT(entries.size() < ssbos.size());
1012 1008
1013 const bool assembly_shaders = device.UseAssemblyShaders(); 1009 const bool assembly_shaders = device.UseAssemblyShaders();
1014 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; 1010 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
@@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
1016 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; 1012 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1017 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; 1013 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1018 const u32 size{gpu_memory.Read<u32>(addr + 8)}; 1014 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1019 SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); 1015 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1020 ++binding; 1016 ++binding;
1021 } 1017 }
1022 if (assembly_shaders) { 1018 if (assembly_shaders) {
1023 UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); 1019 UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
1024 } 1020 }
1025} 1021}
1026 1022
@@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1028 const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; 1024 const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
1029 const auto& entries{kernel->GetEntries().global_memory_entries}; 1025 const auto& entries{kernel->GetEntries().global_memory_entries};
1030 1026
1031 std::array<GLuint64EXT, 32> pointers; 1027 std::array<BindlessSSBO, 32> ssbos;
1032 ASSERT(entries.size() < pointers.size()); 1028 ASSERT(entries.size() < ssbos.size());
1033 1029
1034 u32 binding = 0; 1030 u32 binding = 0;
1035 for (const auto& entry : entries) { 1031 for (const auto& entry : entries) {
1036 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; 1032 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1037 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; 1033 const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
1038 const u32 size{gpu_memory.Read<u32>(addr + 8)}; 1034 const u32 size{gpu_memory.Read<u32>(addr + 8)};
1039 SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); 1035 SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
1040 ++binding; 1036 ++binding;
1041 } 1037 }
1042 if (device.UseAssemblyShaders()) { 1038 if (device.UseAssemblyShaders()) {
1043 UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); 1039 UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
1044 } 1040 }
1045} 1041}
1046 1042
1047void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1043void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1048 GPUVAddr gpu_addr, std::size_t size, 1044 GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
1049 GLuint64EXT* pointer) { 1045 const size_t alignment{device.GetShaderStorageBufferAlignment()};
1050 const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
1051 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1046 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1052 if (device.UseAssemblyShaders()) { 1047 if (device.UseAssemblyShaders()) {
1053 *pointer = info.address + info.offset; 1048 *ssbo = BindlessSSBO{
1049 .address = static_cast<GLuint64EXT>(info.address + info.offset),
1050 .length = static_cast<GLsizei>(size),
1051 .padding = 0,
1052 };
1054 } else { 1053 } else {
1055 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, 1054 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1056 static_cast<GLsizeiptr>(size)); 1055 static_cast<GLsizeiptr>(size));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index f451404b2..1d0f585fa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -53,6 +53,13 @@ namespace OpenGL {
53struct ScreenInfo; 53struct ScreenInfo;
54struct DrawParameters; 54struct DrawParameters;
55 55
56struct BindlessSSBO {
57 GLuint64EXT address;
58 GLsizei length;
59 GLsizei padding;
60};
61static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
62
56class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 63class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
57public: 64public:
58 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, 65 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
@@ -126,7 +133,7 @@ private:
126 133
127 /// Configures a global memory buffer. 134 /// Configures a global memory buffer.
128 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 135 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
129 std::size_t size, GLuint64EXT* pointer); 136 size_t size, BindlessSSBO* ssbo);
130 137
131 /// Configures the current textures to use for the draw command. 138 /// Configures the current textures to use for the draw command.
132 void SetupDrawTextures(std::size_t stage_index, Shader* shader); 139 void SetupDrawTextures(std::size_t stage_index, Shader* shader);