summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp84
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp103
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h17
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp2
7 files changed, 173 insertions, 110 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
index eb5158407..4489abf61 100644
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) {
185 return type; 185 return type;
186} 186}
187 187
188std::string GlobalMemoryName(const GlobalMemoryBase& base) {
189 return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
190}
191
192class ARBDecompiler final { 188class ARBDecompiler final {
193public: 189public:
194 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 190 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
@@ -199,6 +195,8 @@ public:
199 } 195 }
200 196
201private: 197private:
198 void DefineGlobalMemory();
199
202 void DeclareHeader(); 200 void DeclareHeader();
203 void DeclareVertex(); 201 void DeclareVertex();
204 void DeclareGeometry(); 202 void DeclareGeometry();
@@ -228,6 +226,7 @@ private:
228 226
229 std::pair<std::string, std::size_t> BuildCoords(Operation); 227 std::pair<std::string, std::size_t> BuildCoords(Operation);
230 std::string BuildAoffi(Operation); 228 std::string BuildAoffi(Operation);
229 std::string GlobalMemoryPointer(const GmemNode& gmem);
231 void Exit(); 230 void Exit();
232 231
233 std::string Assign(Operation); 232 std::string Assign(Operation);
@@ -378,10 +377,8 @@ private:
378 std::string address; 377 std::string address;
379 std::string_view opname; 378 std::string_view opname;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { 379 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), 380 address = GlobalMemoryPointer(*gmem);
382 Visit(gmem->GetBaseAddress())); 381 opname = "ATOM";
383 address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
384 opname = "ATOMB";
385 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { 382 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
386 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); 383 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
387 opname = "ATOMS"; 384 opname = "ATOMS";
@@ -456,9 +453,13 @@ private:
456 shader_source += '\n'; 453 shader_source += '\n';
457 } 454 }
458 455
459 std::string AllocTemporary() { 456 std::string AllocLongVectorTemporary() {
460 max_temporaries = std::max(max_temporaries, num_temporaries + 1); 457 max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1);
461 return fmt::format("T{}.x", num_temporaries++); 458 return fmt::format("L{}", num_long_temporaries++);
459 }
460
461 std::string AllocLongTemporary() {
462 return fmt::format("{}.x", AllocLongVectorTemporary());
462 } 463 }
463 464
464 std::string AllocVectorTemporary() { 465 std::string AllocVectorTemporary() {
@@ -466,8 +467,13 @@ private:
466 return fmt::format("T{}", num_temporaries++); 467 return fmt::format("T{}", num_temporaries++);
467 } 468 }
468 469
470 std::string AllocTemporary() {
471 return fmt::format("{}.x", AllocVectorTemporary());
472 }
473
469 void ResetTemporaries() noexcept { 474 void ResetTemporaries() noexcept {
470 num_temporaries = 0; 475 num_temporaries = 0;
476 num_long_temporaries = 0;
471 } 477 }
472 478
473 const Device& device; 479 const Device& device;
@@ -478,6 +484,11 @@ private:
478 std::size_t num_temporaries = 0; 484 std::size_t num_temporaries = 0;
479 std::size_t max_temporaries = 0; 485 std::size_t max_temporaries = 0;
480 486
487 std::size_t num_long_temporaries = 0;
488 std::size_t max_long_temporaries = 0;
489
490 std::map<GlobalMemoryBase, u32> global_memory_names;
491
481 std::string shader_source; 492 std::string shader_source;
482 493
483 static constexpr std::string_view ADD_F32 = "ADD.F32"; 494 static constexpr std::string_view ADD_F32 = "ADD.F32";
@@ -784,6 +795,8 @@ private:
784ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 795ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
785 ShaderType stage, std::string_view identifier) 796 ShaderType stage, std::string_view identifier)
786 : device{device}, ir{ir}, registry{registry}, stage{stage} { 797 : device{device}, ir{ir}, registry{registry}, stage{stage} {
798 DefineGlobalMemory();
799
787 AddLine("TEMP RC;"); 800 AddLine("TEMP RC;");
788 AddLine("TEMP FSWZA[4];"); 801 AddLine("TEMP FSWZA[4];");
789 AddLine("TEMP FSWZB[4];"); 802 AddLine("TEMP FSWZB[4];");
@@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) {
829 } 842 }
830} 843}
831 844
845void ARBDecompiler::DefineGlobalMemory() {
846 u32 binding = 0;
847 for (const auto& pair : ir.GetGlobalMemory()) {
848 const GlobalMemoryBase base = pair.first;
849 global_memory_names.emplace(base, binding);
850 ++binding;
851 }
852}
853
832void ARBDecompiler::DeclareHeader() { 854void ARBDecompiler::DeclareHeader() {
833 AddLine("!!NV{}5.0", HeaderStageName(stage)); 855 AddLine("!!NV{}5.0", HeaderStageName(stage));
834 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D 856 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
835 AddLine("OPTION NV_internal;"); 857 AddLine("OPTION NV_internal;");
836 AddLine("OPTION NV_gpu_program_fp64;"); 858 AddLine("OPTION NV_gpu_program_fp64;");
837 AddLine("OPTION NV_shader_storage_buffer;");
838 AddLine("OPTION NV_shader_thread_group;"); 859 AddLine("OPTION NV_shader_thread_group;");
839 if (ir.UsesWarps() && device.HasWarpIntrinsics()) { 860 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
840 AddLine("OPTION NV_shader_thread_shuffle;"); 861 AddLine("OPTION NV_shader_thread_shuffle;");
@@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() {
951} 972}
952 973
953void ARBDecompiler::DeclareGlobalMemory() { 974void ARBDecompiler::DeclareGlobalMemory() {
954 u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; 975 const std::size_t num_entries = ir.GetGlobalMemory().size();
955 for (const auto& pair : ir.GetGlobalMemory()) { 976 if (num_entries > 0) {
956 const auto& base = pair.first; 977 const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
957 AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); 978 AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
958 ++binding;
959 } 979 }
960} 980}
961 981
@@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() {
977 for (std::size_t i = 0; i < max_temporaries; ++i) { 997 for (std::size_t i = 0; i < max_temporaries; ++i) {
978 AddLine("TEMP T{};", i); 998 AddLine("TEMP T{};", i);
979 } 999 }
1000 for (std::size_t i = 0; i < max_long_temporaries; ++i) {
1001 AddLine("LONG TEMP L{};", i);
1002 }
980} 1003}
981 1004
982void ARBDecompiler::DeclarePredicates() { 1005void ARBDecompiler::DeclarePredicates() {
@@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) {
1339 1362
1340 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1363 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1341 std::string temporary = AllocTemporary(); 1364 std::string temporary = AllocTemporary();
1342 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), 1365 AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
1343 Visit(gmem->GetBaseAddress()));
1344 AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
1345 temporary);
1346 return temporary; 1366 return temporary;
1347 } 1367 }
1348 1368
@@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
1419 return fmt::format(", offset({})", temporary); 1439 return fmt::format(", offset({})", temporary);
1420} 1440}
1421 1441
1442std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
1443 const u32 binding = global_memory_names.at(gmem.GetDescriptor());
1444 const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
1445
1446 const std::string pointer = AllocLongVectorTemporary();
1447 std::string temporary = AllocTemporary();
1448
1449 const u32 local_index = binding / 2;
1450 AddLine("PK64.U {}, c[{}];", pointer, local_index);
1451 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
1452 Visit(gmem.GetBaseAddress()));
1453 AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
1454 AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
1455 return fmt::format("{}.x", pointer);
1456}
1457
1422void ARBDecompiler::Exit() { 1458void ARBDecompiler::Exit() {
1423 if (stage != ShaderType::Fragment) { 1459 if (stage != ShaderType::Fragment) {
1424 AddLine("RET;"); 1460 AddLine("RET;");
@@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) {
1515 ResetTemporaries(); 1551 ResetTemporaries();
1516 return {}; 1552 return {};
1517 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1553 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1518 const std::string temporary = AllocTemporary(); 1554 AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
1519 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1520 Visit(gmem->GetBaseAddress()));
1521 AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
1522 temporary);
1523 ResetTemporaries(); 1555 ResetTemporaries();
1524 return {}; 1556 return {};
1525 } else { 1557 } else {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index e461e4c70..e866d8f2f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
26 : VideoCommon::BufferBlock{cpu_addr, size} { 26 : VideoCommon::BufferBlock{cpu_addr, size} {
27 gl_buffer.Create(); 27 gl_buffer.Create();
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
29 if (device.HasVertexBufferUnifiedMemory()) { 29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); 30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); 31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 } 32 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c3fad563c..03e82c599 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) {
139 (state ? glEnable : glDisable)(cap); 139 (state ? glEnable : glDisable)(cap);
140} 140}
141 141
142void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
143 if (num_entries == 0) {
144 return;
145 }
146 if (num_entries % 2 == 1) {
147 pointers[num_entries] = 0;
148 }
149 const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
150 glProgramLocalParametersI4uivNV(target, 0, num_vectors,
151 reinterpret_cast<const GLuint*>(pointers));
152}
153
142} // Anonymous namespace 154} // Anonymous namespace
143 155
144RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 156RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
@@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
324void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 336void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
325 MICROPROFILE_SCOPE(OpenGL_Shader); 337 MICROPROFILE_SCOPE(OpenGL_Shader);
326 auto& gpu = system.GPU().Maxwell3D(); 338 auto& gpu = system.GPU().Maxwell3D();
327 std::size_t num_ssbos = 0;
328 u32 clip_distances = 0; 339 u32 clip_distances = 0;
329 340
330 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 341 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
347 } 358 }
348 359
349 // Currently this stages are not supported in the OpenGL backend. 360 // Currently this stages are not supported in the OpenGL backend.
350 // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL 361 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
351 if (program == Maxwell::ShaderProgram::TesselationControl) { 362 if (program == Maxwell::ShaderProgram::TesselationControl ||
363 program == Maxwell::ShaderProgram::TesselationEval) {
352 continue; 364 continue;
353 } else if (program == Maxwell::ShaderProgram::TesselationEval) {
354 continue;
355 }
356
357 Shader* shader = shader_cache.GetStageProgram(program, async_shaders);
358
359 if (device.UseAssemblyShaders()) {
360 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
361 // all stages share the same bindings.
362 const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
363 ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
364 num_ssbos += num_stage_ssbos;
365 } 365 }
366 366
367 // Stage indices are 0 - 5 367 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
368 const std::size_t stage = index == 0 ? 0 : index - 1;
369 SetupDrawConstBuffers(stage, shader);
370 SetupDrawGlobalMemory(stage, shader);
371 SetupDrawTextures(stage, shader);
372 SetupDrawImages(stage, shader);
373 368
374 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; 369 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
375 switch (program) { 370 switch (program) {
@@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
388 shader_config.enable.Value(), shader_config.offset); 383 shader_config.enable.Value(), shader_config.offset);
389 } 384 }
390 385
386 // Stage indices are 0 - 5
387 const std::size_t stage = index == 0 ? 0 : index - 1;
388 SetupDrawConstBuffers(stage, shader);
389 SetupDrawGlobalMemory(stage, shader);
390 SetupDrawTextures(stage, shader);
391 SetupDrawImages(stage, shader);
392
391 // Workaround for Intel drivers. 393 // Workaround for Intel drivers.
392 // When a clip distance is enabled but not set in the shader it crops parts of the screen 394 // When a clip distance is enabled but not set in the shader it crops parts of the screen
393 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the 395 // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
@@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
749 current_cbuf = 0; 751 current_cbuf = 0;
750 752
751 auto kernel = shader_cache.GetComputeKernel(code_addr); 753 auto kernel = shader_cache.GetComputeKernel(code_addr);
754 program_manager.BindCompute(kernel->GetHandle());
755
752 SetupComputeTextures(kernel); 756 SetupComputeTextures(kernel);
753 SetupComputeImages(kernel); 757 SetupComputeImages(kernel);
754 758
@@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
763 buffer_cache.Unmap(); 767 buffer_cache.Unmap();
764 768
765 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 769 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
766 program_manager.BindCompute(kernel->GetHandle());
767 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 770 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
768 ++num_queued_commands; 771 ++num_queued_commands;
769} 772}
@@ -1023,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
1023} 1026}
1024 1027
1025void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { 1028void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
1029 static constexpr std::array TARGET_LUT = {
1030 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1031 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1032 };
1033
1026 auto& gpu{system.GPU()}; 1034 auto& gpu{system.GPU()};
1027 auto& memory_manager{gpu.MemoryManager()}; 1035 auto& memory_manager{gpu.MemoryManager()};
1028 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 1036 const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
1037 const auto& entries{shader->GetEntries().global_memory_entries};
1038
1039 std::array<GLuint64EXT, 32> pointers;
1040 ASSERT(entries.size() < pointers.size());
1029 1041
1030 u32 binding = 1042 const bool assembly_shaders = device.UseAssemblyShaders();
1031 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; 1043 u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
1032 for (const auto& entry : shader->GetEntries().global_memory_entries) { 1044 for (const auto& entry : entries) {
1033 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; 1045 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
1034 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; 1046 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
1035 const u32 size{memory_manager.Read<u32>(addr + 8)}; 1047 const u32 size{memory_manager.Read<u32>(addr + 8)};
1036 SetupGlobalMemory(binding++, entry, gpu_addr, size); 1048 SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
1049 ++binding;
1050 }
1051 if (assembly_shaders) {
1052 UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
1037 } 1053 }
1038} 1054}
1039 1055
1040void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { 1056void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1041 auto& gpu{system.GPU()}; 1057 auto& gpu{system.GPU()};
1042 auto& memory_manager{gpu.MemoryManager()}; 1058 auto& memory_manager{gpu.MemoryManager()};
1043 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 1059 const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
1060 const auto& entries{kernel->GetEntries().global_memory_entries};
1061
1062 std::array<GLuint64EXT, 32> pointers;
1063 ASSERT(entries.size() < pointers.size());
1044 1064
1045 u32 binding = 0; 1065 u32 binding = 0;
1046 for (const auto& entry : kernel->GetEntries().global_memory_entries) { 1066 for (const auto& entry : entries) {
1047 const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; 1067 const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
1048 const auto gpu_addr{memory_manager.Read<u64>(addr)}; 1068 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
1049 const auto size{memory_manager.Read<u32>(addr + 8)}; 1069 const u32 size{memory_manager.Read<u32>(addr + 8)};
1050 SetupGlobalMemory(binding++, entry, gpu_addr, size); 1070 SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
1071 ++binding;
1072 }
1073 if (device.UseAssemblyShaders()) {
1074 UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
1051 } 1075 }
1052} 1076}
1053 1077
1054void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1078void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1055 GPUVAddr gpu_addr, std::size_t size) { 1079 GPUVAddr gpu_addr, std::size_t size,
1056 const auto alignment{device.GetShaderStorageBufferAlignment()}; 1080 GLuint64EXT* pointer) {
1081 const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
1057 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1082 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1058 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, 1083 if (device.UseAssemblyShaders()) {
1059 static_cast<GLsizeiptr>(size)); 1084 *pointer = info.address + info.offset;
1085 } else {
1086 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1087 static_cast<GLsizeiptr>(size));
1088 }
1060} 1089}
1061 1090
1062void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { 1091void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a95646936..ccc6f50f6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -124,9 +124,9 @@ private:
124 /// Configures the current global memory entries to use for the kernel invocation. 124 /// Configures the current global memory entries to use for the kernel invocation.
125 void SetupComputeGlobalMemory(Shader* kernel); 125 void SetupComputeGlobalMemory(Shader* kernel);
126 126
127 /// Configures a constant buffer. 127 /// Configures a global memory buffer.
128 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 128 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
129 std::size_t size); 129 std::size_t size, GLuint64EXT* pointer);
130 130
131 /// Configures the current textures to use for the draw command. 131 /// Configures the current textures to use for the draw command.
132 void SetupDrawTextures(std::size_t stage_index, Shader* shader); 132 void SetupDrawTextures(std::size_t stage_index, Shader* shader);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8e754fa90..691c6c79b 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -11,8 +11,30 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14ProgramManager::ProgramManager(const Device& device) { 14namespace {
15 use_assembly_programs = device.UseAssemblyShaders(); 15
16void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
17 if (current == old) {
18 return;
19 }
20 if (current == 0) {
21 if (enabled) {
22 enabled = false;
23 glDisable(stage);
24 }
25 return;
26 }
27 if (!enabled) {
28 enabled = true;
29 glEnable(stage);
30 }
31 glBindProgramARB(stage, current);
32}
33
34} // Anonymous namespace
35
36ProgramManager::ProgramManager(const Device& device)
37 : use_assembly_programs{device.UseAssemblyShaders()} {
16 if (use_assembly_programs) { 38 if (use_assembly_programs) {
17 glEnable(GL_COMPUTE_PROGRAM_NV); 39 glEnable(GL_COMPUTE_PROGRAM_NV);
18 } else { 40 } else {
@@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) {
33} 55}
34 56
35void ProgramManager::BindGraphicsPipeline() { 57void ProgramManager::BindGraphicsPipeline() {
36 if (use_assembly_programs) { 58 if (!use_assembly_programs) {
37 UpdateAssemblyPrograms();
38 } else {
39 UpdateSourcePrograms(); 59 UpdateSourcePrograms();
40 } 60 }
41} 61}
@@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() {
63 } 83 }
64} 84}
65 85
66void ProgramManager::UpdateAssemblyPrograms() { 86void ProgramManager::UseVertexShader(GLuint program) {
67 const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { 87 if (use_assembly_programs) {
68 if (current == old) { 88 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
69 return; 89 }
70 } 90 current_state.vertex = program;
71 if (current == 0) { 91}
72 if (enabled) {
73 enabled = false;
74 glDisable(stage);
75 }
76 return;
77 }
78 if (!enabled) {
79 enabled = true;
80 glEnable(stage);
81 }
82 glBindProgramARB(stage, current);
83 };
84 92
85 update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); 93void ProgramManager::UseGeometryShader(GLuint program) {
86 update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, 94 if (use_assembly_programs) {
87 old_state.geometry); 95 BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
88 update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, 96 }
89 old_state.fragment); 97 current_state.geometry = program;
98}
90 99
91 old_state = current_state; 100void ProgramManager::UseFragmentShader(GLuint program) {
101 if (use_assembly_programs) {
102 BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
103 }
104 current_state.fragment = program;
92} 105}
93 106
94void ProgramManager::UpdateSourcePrograms() { 107void ProgramManager::UpdateSourcePrograms() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 0f03b4f12..950e0dfcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,17 +45,9 @@ public:
45 /// Rewinds BindHostPipeline state changes. 45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline(); 46 void RestoreGuestPipeline();
47 47
48 void UseVertexShader(GLuint program) { 48 void UseVertexShader(GLuint program);
49 current_state.vertex = program; 49 void UseGeometryShader(GLuint program);
50 } 50 void UseFragmentShader(GLuint program);
51
52 void UseGeometryShader(GLuint program) {
53 current_state.geometry = program;
54 }
55
56 void UseFragmentShader(GLuint program) {
57 current_state.fragment = program;
58 }
59 51
60private: 52private:
61 struct PipelineState { 53 struct PipelineState {
@@ -64,9 +56,6 @@ private:
64 GLuint fragment = 0; 56 GLuint fragment = 0;
65 }; 57 };
66 58
67 /// Update NV_gpu_program5 programs.
68 void UpdateAssemblyPrograms();
69
70 /// Update GLSL programs. 59 /// Update GLSL programs.
71 void UpdateSourcePrograms(); 60 void UpdateSourcePrograms();
72 61
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 3655ff629..887995cf4 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver
35 mapped_ptr = static_cast<u8*>( 35 mapped_ptr = static_cast<u8*>(
36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
37 37
38 if (device.HasVertexBufferUnifiedMemory()) { 38 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); 39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); 40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
41 } 41 }