diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_arb_decompiler.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 9 |
3 files changed, 54 insertions, 33 deletions
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp index b7e9ed2e9..f4db62787 100644 --- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -376,9 +376,11 @@ private: | |||
| 376 | std::string temporary = AllocTemporary(); | 376 | std::string temporary = AllocTemporary(); |
| 377 | std::string address; | 377 | std::string address; |
| 378 | std::string_view opname; | 378 | std::string_view opname; |
| 379 | bool robust = false; | ||
| 379 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | 380 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { |
| 380 | address = GlobalMemoryPointer(*gmem); | 381 | address = GlobalMemoryPointer(*gmem); |
| 381 | opname = "ATOM"; | 382 | opname = "ATOM"; |
| 383 | robust = true; | ||
| 382 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | 384 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { |
| 383 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | 385 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); |
| 384 | opname = "ATOMS"; | 386 | opname = "ATOMS"; |
| @@ -386,7 +388,15 @@ private: | |||
| 386 | UNREACHABLE(); | 388 | UNREACHABLE(); |
| 387 | return "{0, 0, 0, 0}"; | 389 | return "{0, 0, 0, 0}"; |
| 388 | } | 390 | } |
| 391 | if (robust) { | ||
| 392 | AddLine("IF NE.x;"); | ||
| 393 | } | ||
| 389 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | 394 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); |
| 395 | if (robust) { | ||
| 396 | AddLine("ELSE;"); | ||
| 397 | AddLine("MOV.S {}, 0;", temporary); | ||
| 398 | AddLine("ENDIF;"); | ||
| 399 | } | ||
| 390 | return temporary; | 400 | return temporary; |
| 391 | } | 401 | } |
| 392 | 402 | ||
| @@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() { | |||
| 980 | } | 990 | } |
| 981 | 991 | ||
| 982 | void ARBDecompiler::DeclareGlobalMemory() { | 992 | void ARBDecompiler::DeclareGlobalMemory() { |
| 983 | const std::size_t num_entries = ir.GetGlobalMemory().size(); | 993 | const size_t num_entries = ir.GetGlobalMemory().size(); |
| 984 | if (num_entries > 0) { | 994 | if (num_entries > 0) { |
| 985 | const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2; | 995 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1); |
| 986 | AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1); | ||
| 987 | } | 996 | } |
| 988 | } | 997 | } |
| 989 | 998 | ||
| @@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) { | |||
| 1363 | 1372 | ||
| 1364 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1373 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| 1365 | std::string temporary = AllocTemporary(); | 1374 | std::string temporary = AllocTemporary(); |
| 1366 | AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem)); | 1375 | AddLine("MOV {}, 0;", temporary); |
| 1376 | AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem)); | ||
| 1367 | return temporary; | 1377 | return temporary; |
| 1368 | } | 1378 | } |
| 1369 | 1379 | ||
| @@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) { | |||
| 1441 | } | 1451 | } |
| 1442 | 1452 | ||
| 1443 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { | 1453 | std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) { |
| 1454 | // Read a bindless SSBO, return its address and set CC accordingly | ||
| 1455 | // address = c[binding].xy | ||
| 1456 | // length = c[binding].z | ||
| 1444 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); | 1457 | const u32 binding = global_memory_names.at(gmem.GetDescriptor()); |
| 1445 | const char result_swizzle = binding % 2 == 0 ? 'x' : 'y'; | ||
| 1446 | 1458 | ||
| 1447 | const std::string pointer = AllocLongVectorTemporary(); | 1459 | const std::string pointer = AllocLongVectorTemporary(); |
| 1448 | std::string temporary = AllocTemporary(); | 1460 | std::string temporary = AllocTemporary(); |
| 1449 | 1461 | ||
| 1450 | const u32 local_index = binding / 2; | 1462 | AddLine("PK64.U {}, c[{}];", pointer, binding); |
| 1451 | AddLine("PK64.U {}, c[{}];", pointer, local_index); | ||
| 1452 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), | 1463 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()), |
| 1453 | Visit(gmem.GetBaseAddress())); | 1464 | Visit(gmem.GetBaseAddress())); |
| 1454 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); | 1465 | AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary); |
| 1455 | AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer); | 1466 | AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer); |
| 1467 | // Compare offset to length and set CC | ||
| 1468 | AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding); | ||
| 1456 | return fmt::format("{}.x", pointer); | 1469 | return fmt::format("{}.x", pointer); |
| 1457 | } | 1470 | } |
| 1458 | 1471 | ||
| @@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) { | |||
| 1552 | ResetTemporaries(); | 1565 | ResetTemporaries(); |
| 1553 | return {}; | 1566 | return {}; |
| 1554 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1567 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1568 | AddLine("IF NE.x;"); | ||
| 1555 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); | 1569 | AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem)); |
| 1570 | AddLine("ENDIF;"); | ||
| 1556 | ResetTemporaries(); | 1571 | ResetTemporaries(); |
| 1557 | return {}; | 1572 | return {}; |
| 1558 | } else { | 1573 | } else { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bbb2eb17c..36bf92808 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) { | |||
| 139 | (state ? glEnable : glDisable)(cap); | 139 | (state ? glEnable : glDisable)(cap); |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) { | 142 | void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { |
| 143 | if (num_entries == 0) { | 143 | if (num_ssbos == 0) { |
| 144 | return; | 144 | return; |
| 145 | } | 145 | } |
| 146 | if (num_entries % 2 == 1) { | 146 | glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), |
| 147 | pointers[num_entries] = 0; | 147 | reinterpret_cast<const GLuint*>(ssbos)); |
| 148 | } | ||
| 149 | const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2); | ||
| 150 | glProgramLocalParametersI4uivNV(target, 0, num_vectors, | ||
| 151 | reinterpret_cast<const GLuint*>(pointers)); | ||
| 152 | } | 148 | } |
| 153 | 149 | ||
| 154 | } // Anonymous namespace | 150 | } // Anonymous namespace |
| @@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 900 | } | 896 | } |
| 901 | 897 | ||
| 902 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | 898 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { |
| 903 | static constexpr std::array PARAMETER_LUT = { | 899 | static constexpr std::array PARAMETER_LUT{ |
| 904 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 900 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| 905 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | 901 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, |
| 906 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; | 902 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, |
| 907 | 903 | }; | |
| 908 | MICROPROFILE_SCOPE(OpenGL_UBO); | 904 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 909 | const auto& stages = maxwell3d.state.shader_stages; | 905 | const auto& stages = maxwell3d.state.shader_stages; |
| 910 | const auto& shader_stage = stages[stage_index]; | 906 | const auto& shader_stage = stages[stage_index]; |
| @@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||
| 1007 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | 1003 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; |
| 1008 | const auto& entries{shader->GetEntries().global_memory_entries}; | 1004 | const auto& entries{shader->GetEntries().global_memory_entries}; |
| 1009 | 1005 | ||
| 1010 | std::array<GLuint64EXT, 32> pointers; | 1006 | std::array<BindlessSSBO, 32> ssbos; |
| 1011 | ASSERT(entries.size() < pointers.size()); | 1007 | ASSERT(entries.size() < ssbos.size()); |
| 1012 | 1008 | ||
| 1013 | const bool assembly_shaders = device.UseAssemblyShaders(); | 1009 | const bool assembly_shaders = device.UseAssemblyShaders(); |
| 1014 | u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | 1010 | u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; |
| @@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh | |||
| 1016 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | 1012 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |
| 1017 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | 1013 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; |
| 1018 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | 1014 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; |
| 1019 | SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); | 1015 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); |
| 1020 | ++binding; | 1016 | ++binding; |
| 1021 | } | 1017 | } |
| 1022 | if (assembly_shaders) { | 1018 | if (assembly_shaders) { |
| 1023 | UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size()); | 1019 | UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); |
| 1024 | } | 1020 | } |
| 1025 | } | 1021 | } |
| 1026 | 1022 | ||
| @@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | |||
| 1028 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | 1024 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; |
| 1029 | const auto& entries{kernel->GetEntries().global_memory_entries}; | 1025 | const auto& entries{kernel->GetEntries().global_memory_entries}; |
| 1030 | 1026 | ||
| 1031 | std::array<GLuint64EXT, 32> pointers; | 1027 | std::array<BindlessSSBO, 32> ssbos; |
| 1032 | ASSERT(entries.size() < pointers.size()); | 1028 | ASSERT(entries.size() < ssbos.size()); |
| 1033 | 1029 | ||
| 1034 | u32 binding = 0; | 1030 | u32 binding = 0; |
| 1035 | for (const auto& entry : entries) { | 1031 | for (const auto& entry : entries) { |
| 1036 | const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | 1032 | const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; |
| 1037 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | 1033 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; |
| 1038 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | 1034 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; |
| 1039 | SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); | 1035 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); |
| 1040 | ++binding; | 1036 | ++binding; |
| 1041 | } | 1037 | } |
| 1042 | if (device.UseAssemblyShaders()) { | 1038 | if (device.UseAssemblyShaders()) { |
| 1043 | UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size()); | 1039 | UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); |
| 1044 | } | 1040 | } |
| 1045 | } | 1041 | } |
| 1046 | 1042 | ||
| 1047 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | 1043 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 1048 | GPUVAddr gpu_addr, std::size_t size, | 1044 | GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { |
| 1049 | GLuint64EXT* pointer) { | 1045 | const size_t alignment{device.GetShaderStorageBufferAlignment()}; |
| 1050 | const std::size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 1051 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | 1046 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); |
| 1052 | if (device.UseAssemblyShaders()) { | 1047 | if (device.UseAssemblyShaders()) { |
| 1053 | *pointer = info.address + info.offset; | 1048 | *ssbo = BindlessSSBO{ |
| 1049 | .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||
| 1050 | .length = static_cast<GLsizei>(size), | ||
| 1051 | .padding = 0, | ||
| 1052 | }; | ||
| 1054 | } else { | 1053 | } else { |
| 1055 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | 1054 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, |
| 1056 | static_cast<GLsizeiptr>(size)); | 1055 | static_cast<GLsizeiptr>(size)); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f451404b2..1d0f585fa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -53,6 +53,13 @@ namespace OpenGL { | |||
| 53 | struct ScreenInfo; | 53 | struct ScreenInfo; |
| 54 | struct DrawParameters; | 54 | struct DrawParameters; |
| 55 | 55 | ||
| 56 | struct BindlessSSBO { | ||
| 57 | GLuint64EXT address; | ||
| 58 | GLsizei length; | ||
| 59 | GLsizei padding; | ||
| 60 | }; | ||
| 61 | static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | ||
| 62 | |||
| 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 63 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 57 | public: | 64 | public: |
| 58 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, | 65 | explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, |
| @@ -126,7 +133,7 @@ private: | |||
| 126 | 133 | ||
| 127 | /// Configures a global memory buffer. | 134 | /// Configures a global memory buffer. |
| 128 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 135 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 129 | std::size_t size, GLuint64EXT* pointer); | 136 | size_t size, BindlessSSBO* ssbo); |
| 130 | 137 | ||
| 131 | /// Configures the current textures to use for the draw command. | 138 | /// Configures the current textures to use for the draw command. |
| 132 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); | 139 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); |