diff options
| author | 2020-07-20 21:54:17 -0400 | |
|---|---|---|
| committer | 2020-07-20 21:54:17 -0400 | |
| commit | 61e4c0f83d3e355bc717851de4df0e001645ab8f (patch) | |
| tree | 3c68791f86596e93fa6adff17538827166892833 /src | |
| parent | Merge pull request #4376 from ogniK5377/dark-wait-tree (diff) | |
| parent | gl_arb_decompiler: Use NV_shader_buffer_{load,store} on assembly shaders (diff) | |
| download | yuzu-61e4c0f83d3e355bc717851de4df0e001645ab8f.tar.gz yuzu-61e4c0f83d3e355bc717851de4df0e001645ab8f.tar.xz yuzu-61e4c0f83d3e355bc717851de4df0e001645ab8f.zip | |
Merge pull request #4168 from ReinUsesLisp/global-memory
gl_arb_decompiler: Use NV_shader_buffer_{load,store} on assembly shaders
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 103 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 71 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_stream_buffer.cpp | 2 |
7 files changed, 173 insertions, 110 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index eb5158407..4489abf61 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -185,10 +185,6 @@ std::string TextureType(const MetaTexture& meta) { | |||
| 185 | return type; | 185 | return type; |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | 188 | class ARBDecompiler final { |
| 193 | public: | 189 | public: |
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | 190 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| @@ -199,6 +195,8 @@ public: | |||
| 199 | } | 195 | } |
| 200 | 196 | ||
| 201 | private: | 197 | private: |
| 198 | void DefineGlobalMemory(); | ||
| 199 | |||
| 202 | void DeclareHeader(); | 200 | void DeclareHeader(); |
| 203 | void DeclareVertex(); | 201 | void DeclareVertex(); |
| 204 | void DeclareGeometry(); | 202 | void DeclareGeometry(); |
| @@ -228,6 +226,7 @@ private: | |||
| 228 | 226 | ||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | 227 | std::pair<std::string, std::size_t> BuildCoords(Operation); |
| 230 | std::string BuildAoffi(Operation); | 228 | std::string BuildAoffi(Operation); |
| 229 | std::string GlobalMemoryPointer(const GmemNode& gmem); | ||
| 231 | void Exit(); | 230 | void Exit(); |
| 232 | 231 | ||
| 233 | std::string Assign(Operation); | 232 | std::string Assign(Operation); |
| @@ -378,10 +377,8 @@ private: | |||
| 378 | std::string address; | 377 | std::string address; |
| 379 | std::string_view opname; | 378 | std::string_view opname; |
| 380 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | 379 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { |
| 381 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | 380 | address = GlobalMemoryPointer(*gmem); |
| 382 | Visit(gmem->GetBaseAddress())); | 381 | opname = "ATOM"; |
| 383 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 384 | opname = "ATOMB"; | ||
| 385 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | 382 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { |
| 386 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | 383 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); |
| 387 | opname = "ATOMS"; | 384 | opname = "ATOMS"; |
| @@ -456,9 +453,13 @@ private: | |||
| 456 | shader_source += '\n'; | 453 | shader_source += '\n'; |
| 457 | } | 454 | } |
| 458 | 455 | ||
| 459 | std::string AllocTemporary() { | 456 | std::string AllocLongVectorTemporary() { |
| 460 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | 457 | max_long_temporaries = std::max(max_long_temporaries, num_long_temporaries + 1); |
| 461 | return fmt::format("T{}.x", num_temporaries++); | 458 | return fmt::format("L{}", num_long_temporaries++); |
| 459 | } | ||
| 460 | |||
| 461 | std::string AllocLongTemporary() { | ||
| 462 | return fmt::format("{}.x", AllocLongVectorTemporary()); | ||
| 462 | } | 463 | } |
| 463 | 464 | ||
| 464 | std::string AllocVectorTemporary() { | 465 | std::string AllocVectorTemporary() { |
| @@ -466,8 +467,13 @@ private: | |||
| 466 | return fmt::format("T{}", num_temporaries++); | 467 | return fmt::format("T{}", num_temporaries++); |
| 467 | } | 468 | } |
| 468 | 469 | ||
| 470 | std::string AllocTemporary() { | ||
| 471 | return fmt::format("{}.x", AllocVectorTemporary()); | ||
| 472 | } | ||
| 473 | |||
| 469 | void ResetTemporaries() noexcept { | 474 | void ResetTemporaries() noexcept { |
| 470 | num_temporaries = 0; | 475 | num_temporaries = 0; |
| 476 | num_long_temporaries = 0; | ||
| 471 | } | 477 | } |
| 472 | 478 | ||
| 473 | const Device& device; | 479 | const Device& device; |
| @@ -478,6 +484,11 @@ private: | |||
| 478 | std::size_t num_temporaries = 0; | 484 | std::size_t num_temporaries = 0; |
| 479 | std::size_t max_temporaries = 0; | 485 | std::size_t max_temporaries = 0; |
| 480 | 486 | ||
| 487 | std::size_t num_long_temporaries = 0; | ||
| 488 | std::size_t max_long_temporaries = 0; | ||
| 489 | |||
| 490 | std::map<GlobalMemoryBase, u32> global_memory_names; | ||
| 491 | |||
| 481 | std::string shader_source; | 492 | std::string shader_source; |
| 482 | 493 | ||
| 483 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | 494 | static constexpr std::string_view ADD_F32 = "ADD.F32"; |
| @@ -784,6 +795,8 @@ private: | |||
| 784 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | 795 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 785 | ShaderType stage, std::string_view identifier) | 796 | ShaderType stage, std::string_view identifier) |
| 786 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | 797 | : device{device}, ir{ir}, registry{registry}, stage{stage} { |
| 798 | DefineGlobalMemory(); | ||
| 799 | |||
| 787 | AddLine("TEMP RC;"); | 800 | AddLine("TEMP RC;"); |
| 788 | AddLine("TEMP FSWZA[4];"); | 801 | AddLine("TEMP FSWZA[4];"); |
| 789 | AddLine("TEMP FSWZB[4];"); | 802 | AddLine("TEMP FSWZB[4];"); |
| @@ -829,12 +842,20 @@ std::string_view HeaderStageName(ShaderType stage) { | |||
| 829 | } | 842 | } |
| 830 | } | 843 | } |
| 831 | 844 | ||
| 845 | void ARBDecompiler::DefineGlobalMemory() { | ||
| 846 | u32 binding = 0; | ||
| 847 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 848 | const GlobalMemoryBase base = pair.first; | ||
| 849 | global_memory_names.emplace(base, binding); | ||
| 850 | ++binding; | ||
| 851 | } | ||
| 852 | } | ||
| 853 | |||
| 832 | void ARBDecompiler::DeclareHeader() { | 854 | void ARBDecompiler::DeclareHeader() { |
| 833 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | 855 | AddLine("!!NV{}5.0", HeaderStageName(stage)); |
| 834 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | 856 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D |
| 835 | AddLine("OPTION NV_internal;"); | 857 | AddLine("OPTION NV_internal;"); |
| 836 | AddLine("OPTION NV_gpu_program_fp64;"); | 858 | AddLine("OPTION NV_gpu_program_fp64;"); |
| 837 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 838 | AddLine("OPTION NV_shader_thread_group;"); | 859 | AddLine("OPTION NV_shader_thread_group;"); |
| 839 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | 860 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { |
| 840 | AddLine("OPTION NV_shader_thread_shuffle;"); | 861 | AddLine("OPTION NV_shader_thread_shuffle;"); |
| @@ -951,11 +972,10 @@ void ARBDecompiler::DeclareLocalMemory() { | |||
| 951 | } | 972 | } |
| 952 | 973 | ||
| 953 | void ARBDecompiler::DeclareGlobalMemory() { | 974 | void ARBDecompiler::DeclareGlobalMemory() { |
| 954 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | 975 | const std::size_t num_entries = ir.GetGlobalMemory().size(); |
| 955 | for (const auto& pair : ir.GetGlobalMemory()) { | 976 | if (num_entries > 0) { |
| 956 | const auto& base = pair.first; | 977 | const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; |
| 957 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | 978 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); |
| 958 | ++binding; | ||
| 959 | } | 979 | } |
| 960 | } | 980 | } |
| 961 | 981 | ||
| @@ -977,6 +997,9 @@ void ARBDecompiler::DeclareTemporaries() { | |||
| 977 | for (std::size_t i = 0; i < max_temporaries; ++i) { | 997 | for (std::size_t i = 0; i < max_temporaries; ++i) { |
| 978 | AddLine("TEMP T{};", i); | 998 | AddLine("TEMP T{};", i); |
| 979 | } | 999 | } |
| 1000 | for (std::size_t i = 0; i < max_long_temporaries; ++i) { | ||
| 1001 | AddLine("LONG TEMP L{};", i); | ||
| 1002 | } | ||
| 980 | } | 1003 | } |
| 981 | 1004 | ||
| 982 | void ARBDecompiler::DeclarePredicates() { | 1005 | void ARBDecompiler::DeclarePredicates() { |
| @@ -1339,10 +1362,7 @@ std::string ARBDecompiler::Visit(const Node& node) { | |||
| 1339 | 1362 | ||
| 1340 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1363 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| 1341 | std::string temporary = AllocTemporary(); | 1364 | std::string temporary = AllocTemporary(); |
| 1342 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | 1365 | AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); |
| 1343 | Visit(gmem->GetBaseAddress())); | ||
| 1344 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1345 | temporary); | ||
| 1346 | return temporary; | 1366 | return temporary; |
| 1347 | } | 1367 | } |
| 1348 | 1368 | ||
| @@ -1419,6 +1439,22 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { | |||
| 1419 | return fmt::format(", offset({})", temporary); | 1439 | return fmt::format(", offset({})", temporary); |
| 1420 | } | 1440 | } |
| 1421 | 1441 | ||
| 1442 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | ||
| 1443 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | ||
| 1444 | const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; | ||
| 1445 | |||
| 1446 | const std::string pointer = AllocLongVectorTemporary(); | ||
| 1447 | std::string temporary = AllocTemporary(); | ||
| 1448 | |||
| 1449 | const u32 local_index = binding / 2; | ||
| 1450 | AddLine("PK64.U {}, c[{}];", pointer, local_index); | ||
| 1451 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | ||
| 1452 | Visit(gmem.GetBaseAddress())); | ||
| 1453 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | ||
| 1454 | AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); | ||
| 1455 | return fmt::format("{}.x", pointer); | ||
| 1456 | } | ||
| 1457 | |||
| 1422 | void ARBDecompiler::Exit() { | 1458 | void ARBDecompiler::Exit() { |
| 1423 | if (stage != ShaderType::Fragment) { | 1459 | if (stage != ShaderType::Fragment) { |
| 1424 | AddLine("RET;"); | 1460 | AddLine("RET;"); |
| @@ -1515,11 +1551,7 @@ std::string ARBDecompiler::Assign(Operation operation) { | |||
| 1515 | ResetTemporaries(); | 1551 | ResetTemporaries(); |
| 1516 | return {}; | 1552 | return {}; |
| 1517 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1553 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1518 | const std::string temporary = AllocTemporary(); | 1554 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); |
| 1519 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1520 | Visit(gmem->GetBaseAddress())); | ||
| 1521 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1522 | temporary); | ||
| 1523 | ResetTemporaries(); | 1555 | ResetTemporaries(); |
| 1524 | return {}; | 1556 | return {}; |
| 1525 | } else { | 1557 | } else { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e461e4c70..e866d8f2f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -26,7 +26,7 @@ Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) | |||
| 26 | : VideoCommon::BufferBlock{cpu_addr, size} { | 26 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 27 | gl_buffer.Create(); | 27 | gl_buffer.Create(); |
| 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 29 | if (device.HasVertexBufferUnifiedMemory()) { | 29 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { |
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); |
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); |
| 32 | } | 32 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c3fad563c..03e82c599 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -139,6 +139,18 @@ void oglEnable(GLenum cap, bool state) { | |||
| 139 | (state ? glEnable : glDisable)(cap); | 139 | (state ? glEnable : glDisable)(cap); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { | ||
| 143 | if (num_entries == 0) { | ||
| 144 | return; | ||
| 145 | } | ||
| 146 | if (num_entries % 2 == 1) { | ||
| 147 | pointers[num_entries] = 0; | ||
| 148 | } | ||
| 149 | const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); | ||
| 150 | glProgramLocalParametersI4uivNV(target, 0, num_vectors, | ||
| 151 | reinterpret_cast<const GLuint*>(pointers)); | ||
| 152 | } | ||
| 153 | |||
| 142 | } // Anonymous namespace | 154 | } // Anonymous namespace |
| 143 | 155 | ||
| 144 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 156 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| @@ -324,7 +336,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 324 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 336 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 325 | MICROPROFILE_SCOPE(OpenGL_Shader); | 337 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 326 | auto& gpu = system.GPU().Maxwell3D(); | 338 | auto& gpu = system.GPU().Maxwell3D(); |
| 327 | std::size_t num_ssbos = 0; | ||
| 328 | u32 clip_distances = 0; | 339 | u32 clip_distances = 0; |
| 329 | 340 | ||
| 330 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 341 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -347,29 +358,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 347 | } | 358 | } |
| 348 | 359 | ||
| 349 | // Currently this stages are not supported in the OpenGL backend. | 360 | // Currently this stages are not supported in the OpenGL backend. |
| 350 | // Todo(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL | 361 | // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL |
| 351 | if (program == Maxwell::ShaderProgram::TesselationControl) { | 362 | if (program == Maxwell::ShaderProgram::TesselationControl || |
| 363 | program == Maxwell::ShaderProgram::TesselationEval) { | ||
| 352 | continue; | 364 | continue; |
| 353 | } else if (program == Maxwell::ShaderProgram::TesselationEval) { | ||
| 354 | continue; | ||
| 355 | } | ||
| 356 | |||
| 357 | Shader* shader = shader_cache.GetStageProgram(program, async_shaders); | ||
| 358 | |||
| 359 | if (device.UseAssemblyShaders()) { | ||
| 360 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | ||
| 361 | // all stages share the same bindings. | ||
| 362 | const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); | ||
| 363 | ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); | ||
| 364 | num_ssbos += num_stage_ssbos; | ||
| 365 | } | 365 | } |
| 366 | 366 | ||
| 367 | // Stage indices are 0 - 5 | 367 | Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); |
| 368 | const std::size_t stage = index == 0 ? 0 : index - 1; | ||
| 369 | SetupDrawConstBuffers(stage, shader); | ||
| 370 | SetupDrawGlobalMemory(stage, shader); | ||
| 371 | SetupDrawTextures(stage, shader); | ||
| 372 | SetupDrawImages(stage, shader); | ||
| 373 | 368 | ||
| 374 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; | 369 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; |
| 375 | switch (program) { | 370 | switch (program) { |
| @@ -388,6 +383,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 388 | shader_config.enable.Value(), shader_config.offset); | 383 | shader_config.enable.Value(), shader_config.offset); |
| 389 | } | 384 | } |
| 390 | 385 | ||
| 386 | // Stage indices are 0 - 5 | ||
| 387 | const std::size_t stage = index == 0 ? 0 : index - 1; | ||
| 388 | SetupDrawConstBuffers(stage, shader); | ||
| 389 | SetupDrawGlobalMemory(stage, shader); | ||
| 390 | SetupDrawTextures(stage, shader); | ||
| 391 | SetupDrawImages(stage, shader); | ||
| 392 | |||
| 391 | // Workaround for Intel drivers. | 393 | // Workaround for Intel drivers. |
| 392 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 394 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 393 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 395 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| @@ -749,6 +751,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 749 | current_cbuf = 0; | 751 | current_cbuf = 0; |
| 750 | 752 | ||
| 751 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 753 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 754 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 755 | |||
| 752 | SetupComputeTextures(kernel); | 756 | SetupComputeTextures(kernel); |
| 753 | SetupComputeImages(kernel); | 757 | SetupComputeImages(kernel); |
| 754 | 758 | ||
| @@ -763,7 +767,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 763 | buffer_cache.Unmap(); | 767 | buffer_cache.Unmap(); |
| 764 | 768 | ||
| 765 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 769 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 766 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 767 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 770 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 768 | ++num_queued_commands; | 771 | ++num_queued_commands; |
| 769 | } | 772 | } |
| @@ -1023,40 +1026,66 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 1023 | } | 1026 | } |
| 1024 | 1027 | ||
| 1025 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { | 1028 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { |
| 1029 | static constexpr std::array TARGET_LUT = { | ||
| 1030 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||
| 1031 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||
| 1032 | }; | ||
| 1033 | |||
| 1026 | auto& gpu{system.GPU()}; | 1034 | auto& gpu{system.GPU()}; |
| 1027 | auto& memory_manager{gpu.MemoryManager()}; | 1035 | auto& memory_manager{gpu.MemoryManager()}; |
| 1028 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 1036 | const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 1037 | const auto& entries{shader->GetEntries().global_memory_entries}; | ||
| 1038 | |||
| 1039 | std::array<GLuint64EXT, 32> pointers; | ||
| 1040 | ASSERT(entries.size() < pointers.size()); | ||
| 1029 | 1041 | ||
| 1030 | u32 binding = | 1042 | const bool assembly_shaders = device.UseAssemblyShaders(); |
| 1031 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | 1043 | u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; |
| 1032 | for (const auto& entry : shader->GetEntries().global_memory_entries) { | 1044 | for (const auto& entry : entries) { |
| 1033 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | 1045 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |
| 1034 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; | 1046 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; |
| 1035 | const u32 size{memory_manager.Read<u32>(addr + 8)}; | 1047 | const u32 size{memory_manager.Read<u32>(addr + 8)}; |
| 1036 | SetupGlobalMemory(binding++, entry, gpu_addr, size); | 1048 | SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); |
| 1049 | ++binding; | ||
| 1050 | } | ||
| 1051 | if (assembly_shaders) { | ||
| 1052 | UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); | ||
| 1037 | } | 1053 | } |
| 1038 | } | 1054 | } |
| 1039 | 1055 | ||
| 1040 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | 1056 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { |
| 1041 | auto& gpu{system.GPU()}; | 1057 | auto& gpu{system.GPU()}; |
| 1042 | auto& memory_manager{gpu.MemoryManager()}; | 1058 | auto& memory_manager{gpu.MemoryManager()}; |
| 1043 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 1059 | const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 1060 | const auto& entries{kernel->GetEntries().global_memory_entries}; | ||
| 1061 | |||
| 1062 | std::array<GLuint64EXT, 32> pointers; | ||
| 1063 | ASSERT(entries.size() < pointers.size()); | ||
| 1044 | 1064 | ||
| 1045 | u32 binding = 0; | 1065 | u32 binding = 0; |
| 1046 | for (const auto& entry : kernel->GetEntries().global_memory_entries) { | 1066 | for (const auto& entry : entries) { |
| 1047 | const auto addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | 1067 | const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; |
| 1048 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 1068 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; |
| 1049 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 1069 | const u32 size{memory_manager.Read<u32>(addr + 8)}; |
| 1050 | SetupGlobalMemory(binding++, entry, gpu_addr, size); | 1070 | SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); |
| 1071 | ++binding; | ||
| 1072 | } | ||
| 1073 | if (device.UseAssemblyShaders()) { | ||
| 1074 | UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); | ||
| 1051 | } | 1075 | } |
| 1052 | } | 1076 | } |
| 1053 | 1077 | ||
| 1054 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | 1078 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 1055 | GPUVAddr gpu_addr, std::size_t size) { | 1079 | GPUVAddr gpu_addr, std::size_t size, |
| 1056 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 1080 | GLuint64EXT* pointer) { |
| 1081 | const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 1057 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | 1082 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); |
| 1058 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | 1083 | if (device.UseAssemblyShaders()) { |
| 1059 | static_cast<GLsizeiptr>(size)); | 1084 | *pointer = info.address + info.offset; |
| 1085 | } else { | ||
| 1086 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||
| 1087 | static_cast<GLsizeiptr>(size)); | ||
| 1088 | } | ||
| 1060 | } | 1089 | } |
| 1061 | 1090 | ||
| 1062 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { | 1091 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a95646936..ccc6f50f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -124,9 +124,9 @@ private: | |||
| 124 | /// Configures the current global memory entries to use for the kernel invocation. | 124 | /// Configures the current global memory entries to use for the kernel invocation. |
| 125 | void SetupComputeGlobalMemory(Shader* kernel); | 125 | void SetupComputeGlobalMemory(Shader* kernel); |
| 126 | 126 | ||
| 127 | /// Configures a constant buffer. | 127 | /// Configures a global memory buffer. |
| 128 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 128 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 129 | std::size_t size); | 129 | std::size_t size, GLuint64EXT* pointer); |
| 130 | 130 | ||
| 131 | /// Configures the current textures to use for the draw command. | 131 | /// Configures the current textures to use for the draw command. |
| 132 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); | 132 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8e754fa90..691c6c79b 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -11,8 +11,30 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | ProgramManager::ProgramManager(const Device& device) { | 14 | namespace { |
| 15 | use_assembly_programs = device.UseAssemblyShaders(); | 15 | |
| 16 | void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { | ||
| 17 | if (current == old) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | if (current == 0) { | ||
| 21 | if (enabled) { | ||
| 22 | enabled = false; | ||
| 23 | glDisable(stage); | ||
| 24 | } | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | if (!enabled) { | ||
| 28 | enabled = true; | ||
| 29 | glEnable(stage); | ||
| 30 | } | ||
| 31 | glBindProgramARB(stage, current); | ||
| 32 | } | ||
| 33 | |||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | ProgramManager::ProgramManager(const Device& device) | ||
| 37 | : use_assembly_programs{device.UseAssemblyShaders()} { | ||
| 16 | if (use_assembly_programs) { | 38 | if (use_assembly_programs) { |
| 17 | glEnable(GL_COMPUTE_PROGRAM_NV); | 39 | glEnable(GL_COMPUTE_PROGRAM_NV); |
| 18 | } else { | 40 | } else { |
| @@ -33,9 +55,7 @@ void ProgramManager::BindCompute(GLuint program) { | |||
| 33 | } | 55 | } |
| 34 | 56 | ||
| 35 | void ProgramManager::BindGraphicsPipeline() { | 57 | void ProgramManager::BindGraphicsPipeline() { |
| 36 | if (use_assembly_programs) { | 58 | if (!use_assembly_programs) { |
| 37 | UpdateAssemblyPrograms(); | ||
| 38 | } else { | ||
| 39 | UpdateSourcePrograms(); | 59 | UpdateSourcePrograms(); |
| 40 | } | 60 | } |
| 41 | } | 61 | } |
| @@ -63,32 +83,25 @@ void ProgramManager::RestoreGuestPipeline() { | |||
| 63 | } | 83 | } |
| 64 | } | 84 | } |
| 65 | 85 | ||
| 66 | void ProgramManager::UpdateAssemblyPrograms() { | 86 | void ProgramManager::UseVertexShader(GLuint program) { |
| 67 | const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { | 87 | if (use_assembly_programs) { |
| 68 | if (current == old) { | 88 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); |
| 69 | return; | 89 | } |
| 70 | } | 90 | current_state.vertex = program; |
| 71 | if (current == 0) { | 91 | } |
| 72 | if (enabled) { | ||
| 73 | enabled = false; | ||
| 74 | glDisable(stage); | ||
| 75 | } | ||
| 76 | return; | ||
| 77 | } | ||
| 78 | if (!enabled) { | ||
| 79 | enabled = true; | ||
| 80 | glEnable(stage); | ||
| 81 | } | ||
| 82 | glBindProgramARB(stage, current); | ||
| 83 | }; | ||
| 84 | 92 | ||
| 85 | update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); | 93 | void ProgramManager::UseGeometryShader(GLuint program) { |
| 86 | update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, | 94 | if (use_assembly_programs) { |
| 87 | old_state.geometry); | 95 | BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); |
| 88 | update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, | 96 | } |
| 89 | old_state.fragment); | 97 | current_state.geometry = program; |
| 98 | } | ||
| 90 | 99 | ||
| 91 | old_state = current_state; | 100 | void ProgramManager::UseFragmentShader(GLuint program) { |
| 101 | if (use_assembly_programs) { | ||
| 102 | BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); | ||
| 103 | } | ||
| 104 | current_state.fragment = program; | ||
| 92 | } | 105 | } |
| 93 | 106 | ||
| 94 | void ProgramManager::UpdateSourcePrograms() { | 107 | void ProgramManager::UpdateSourcePrograms() { |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0f03b4f12..950e0dfcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -45,17 +45,9 @@ public: | |||
| 45 | /// Rewinds BindHostPipeline state changes. | 45 | /// Rewinds BindHostPipeline state changes. |
| 46 | void RestoreGuestPipeline(); | 46 | void RestoreGuestPipeline(); |
| 47 | 47 | ||
| 48 | void UseVertexShader(GLuint program) { | 48 | void UseVertexShader(GLuint program); |
| 49 | current_state.vertex = program; | 49 | void UseGeometryShader(GLuint program); |
| 50 | } | 50 | void UseFragmentShader(GLuint program); |
| 51 | |||
| 52 | void UseGeometryShader(GLuint program) { | ||
| 53 | current_state.geometry = program; | ||
| 54 | } | ||
| 55 | |||
| 56 | void UseFragmentShader(GLuint program) { | ||
| 57 | current_state.fragment = program; | ||
| 58 | } | ||
| 59 | 51 | ||
| 60 | private: | 52 | private: |
| 61 | struct PipelineState { | 53 | struct PipelineState { |
| @@ -64,9 +56,6 @@ private: | |||
| 64 | GLuint fragment = 0; | 56 | GLuint fragment = 0; |
| 65 | }; | 57 | }; |
| 66 | 58 | ||
| 67 | /// Update NV_gpu_program5 programs. | ||
| 68 | void UpdateAssemblyPrograms(); | ||
| 69 | |||
| 70 | /// Update GLSL programs. | 59 | /// Update GLSL programs. |
| 71 | void UpdateSourcePrograms(); | 60 | void UpdateSourcePrograms(); |
| 72 | 61 | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 3655ff629..887995cf4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -35,7 +35,7 @@ OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool ver | |||
| 35 | mapped_ptr = static_cast<u8*>( | 35 | mapped_ptr = static_cast<u8*>( |
| 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); |
| 37 | 37 | ||
| 38 | if (device.HasVertexBufferUnifiedMemory()) { | 38 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { |
| 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); |
| 40 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | 40 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); |
| 41 | } | 41 | } |