diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_type.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 94 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.cpp | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/utils.h | 18 |
16 files changed, 200 insertions, 192 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 72994f4d2..c8dd362ab 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -63,7 +63,6 @@ public: | |||
| 63 | static constexpr std::size_t NumVertexArrays = 32; | 63 | static constexpr std::size_t NumVertexArrays = 32; |
| 64 | static constexpr std::size_t NumVertexAttributes = 32; | 64 | static constexpr std::size_t NumVertexAttributes = 32; |
| 65 | static constexpr std::size_t NumVaryings = 31; | 65 | static constexpr std::size_t NumVaryings = 31; |
| 66 | static constexpr std::size_t NumTextureSamplers = 32; | ||
| 67 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | 66 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number |
| 68 | static constexpr std::size_t NumClipDistances = 8; | 67 | static constexpr std::size_t NumClipDistances = 8; |
| 69 | static constexpr std::size_t MaxShaderProgram = 6; | 68 | static constexpr std::size_t MaxShaderProgram = 6; |
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h index 239196ba9..49ce5cde5 100644 --- a/src/video_core/engines/shader_type.h +++ b/src/video_core/engines/shader_type.h | |||
| @@ -16,5 +16,6 @@ enum class ShaderType : u32 { | |||
| 16 | Fragment = 4, | 16 | Fragment = 4, |
| 17 | Compute = 5, | 17 | Compute = 5, |
| 18 | }; | 18 | }; |
| 19 | static constexpr std::size_t MaxShaderTypes = 6; | ||
| 19 | 20 | ||
| 20 | } // namespace Tegra::Engines | 21 | } // namespace Tegra::Engines |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b30d5be74..5cfa97fc2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -17,6 +17,9 @@ namespace OpenGL { | |||
| 17 | 17 | ||
| 18 | namespace { | 18 | namespace { |
| 19 | 19 | ||
| 20 | // One uniform block is reserved for emulation purposes | ||
| 21 | constexpr u32 ReservedUniformBlocks = 1; | ||
| 22 | |||
| 20 | template <typename T> | 23 | template <typename T> |
| 21 | T GetInteger(GLenum pname) { | 24 | T GetInteger(GLenum pname) { |
| 22 | GLint temporary; | 25 | GLint temporary; |
| @@ -48,6 +51,22 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view | |||
| 48 | return std::find(images.begin(), images.end(), extension) != images.end(); | 51 | return std::find(images.begin(), images.end(), extension) != images.end(); |
| 49 | } | 52 | } |
| 50 | 53 | ||
| 54 | constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) { | ||
| 55 | return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer, | ||
| 56 | lhs.shader_storage_buffer + rhs.shader_storage_buffer, | ||
| 57 | lhs.sampler + rhs.sampler, lhs.image + rhs.image}; | ||
| 58 | } | ||
| 59 | |||
| 60 | Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks, | ||
| 61 | GLenum texture_image_units, GLenum image_uniforms) noexcept { | ||
| 62 | return Device::BaseBindings{ | ||
| 63 | GetInteger<u32>(uniform_blocks) - ReservedUniformBlocks, | ||
| 64 | GetInteger<u32>(shader_storage_blocks), | ||
| 65 | GetInteger<u32>(texture_image_units), | ||
| 66 | GetInteger<u32>(image_uniforms), | ||
| 67 | }; | ||
| 68 | } | ||
| 69 | |||
| 51 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 52 | 71 | ||
| 53 | Device::Device() { | 72 | Device::Device() { |
| @@ -56,6 +75,29 @@ Device::Device() { | |||
| 56 | 75 | ||
| 57 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 76 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 58 | 77 | ||
| 78 | // Reserve the first UBO for emulation bindings | ||
| 79 | base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0}; | ||
| 80 | base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS, | ||
| 81 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, | ||
| 82 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | ||
| 83 | GL_MAX_VERTEX_IMAGE_UNIFORMS); | ||
| 84 | base_bindings[2] = | ||
| 85 | base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | ||
| 86 | GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | ||
| 87 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | ||
| 88 | GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS); | ||
| 89 | base_bindings[3] = | ||
| 90 | base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, | ||
| 91 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, | ||
| 92 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | ||
| 93 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS); | ||
| 94 | base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | ||
| 95 | GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | ||
| 96 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | ||
| 97 | GL_MAX_GEOMETRY_IMAGE_UNIFORMS); | ||
| 98 | // Compute doesn't need any of that | ||
| 99 | base_bindings[5] = BaseBindings{0, 0, 0, 0}; | ||
| 100 | |||
| 59 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 101 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 60 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 102 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 61 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 103 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 6c86fe207..e7d3c48b0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -6,14 +6,32 @@ | |||
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/engines/shader_type.h" | ||
| 9 | 10 | ||
| 10 | namespace OpenGL { | 11 | namespace OpenGL { |
| 11 | 12 | ||
| 12 | class Device { | 13 | static constexpr u32 EmulationUniformBlockBinding = 0; |
| 14 | |||
| 15 | class Device final { | ||
| 13 | public: | 16 | public: |
| 17 | struct BaseBindings final { | ||
| 18 | u32 uniform_buffer{}; | ||
| 19 | u32 shader_storage_buffer{}; | ||
| 20 | u32 sampler{}; | ||
| 21 | u32 image{}; | ||
| 22 | }; | ||
| 23 | |||
| 14 | explicit Device(); | 24 | explicit Device(); |
| 15 | explicit Device(std::nullptr_t); | 25 | explicit Device(std::nullptr_t); |
| 16 | 26 | ||
| 27 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | ||
| 28 | return base_bindings[stage_index]; | ||
| 29 | } | ||
| 30 | |||
| 31 | const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 32 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | ||
| 33 | } | ||
| 34 | |||
| 17 | std::size_t GetUniformBufferAlignment() const { | 35 | std::size_t GetUniformBufferAlignment() const { |
| 18 | return uniform_buffer_alignment; | 36 | return uniform_buffer_alignment; |
| 19 | } | 37 | } |
| @@ -67,6 +85,7 @@ private: | |||
| 67 | static bool TestComponentIndexingBug(); | 85 | static bool TestComponentIndexingBug(); |
| 68 | static bool TestPreciseBug(); | 86 | static bool TestPreciseBug(); |
| 69 | 87 | ||
| 88 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; | ||
| 70 | std::size_t uniform_buffer_alignment{}; | 89 | std::size_t uniform_buffer_alignment{}; |
| 71 | std::size_t shader_storage_alignment{}; | 90 | std::size_t shader_storage_alignment{}; |
| 72 | u32 max_vertex_attributes{}; | 91 | u32 max_vertex_attributes{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8baa73ebf..5c5ad1f6c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 258 | MICROPROFILE_SCOPE(OpenGL_Shader); | 258 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 259 | auto& gpu = system.GPU().Maxwell3D(); | 259 | auto& gpu = system.GPU().Maxwell3D(); |
| 260 | 260 | ||
| 261 | BaseBindings base_bindings; | ||
| 262 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 261 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 263 | 262 | ||
| 264 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 263 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 277 | continue; | 276 | continue; |
| 278 | } | 277 | } |
| 279 | 278 | ||
| 280 | GLShader::MaxwellUniformData ubo{}; | ||
| 281 | ubo.SetFromRegs(gpu); | ||
| 282 | const auto [buffer, offset] = | ||
| 283 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 284 | |||
| 285 | // Bind the emulation info buffer | ||
| 286 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 287 | |||
| 288 | Shader shader{shader_cache.GetStageProgram(program)}; | 279 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 289 | 280 | ||
| 290 | // Stage indices are 0 - 5 | 281 | // Stage indices are 0 - 5 |
| 291 | const std::size_t stage = index == 0 ? 0 : index - 1; | 282 | const std::size_t stage = index == 0 ? 0 : index - 1; |
| 292 | SetupDrawConstBuffers(stage, shader); | 283 | SetupDrawConstBuffers(stage, shader); |
| 293 | SetupDrawGlobalMemory(stage, shader); | 284 | SetupDrawGlobalMemory(stage, shader); |
| 294 | SetupDrawTextures(stage, shader, base_bindings); | 285 | SetupDrawTextures(stage, shader); |
| 295 | SetupDrawImages(stage, shader, base_bindings); | 286 | SetupDrawImages(stage, shader); |
| 296 | 287 | ||
| 297 | const ProgramVariant variant(base_bindings, primitive_mode); | 288 | const ProgramVariant variant(primitive_mode); |
| 298 | const auto [program_handle, next_bindings] = shader->GetHandle(variant); | 289 | const auto program_handle = shader->GetHandle(variant); |
| 299 | 290 | ||
| 300 | switch (program) { | 291 | switch (program) { |
| 301 | case Maxwell::ShaderProgram::VertexA: | 292 | case Maxwell::ShaderProgram::VertexA: |
| @@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 326 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 317 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
| 327 | ++index; | 318 | ++index; |
| 328 | } | 319 | } |
| 329 | |||
| 330 | base_bindings = next_bindings; | ||
| 331 | } | 320 | } |
| 332 | 321 | ||
| 333 | SyncClipEnabled(clip_distances); | 322 | SyncClipEnabled(clip_distances); |
| @@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() { | |||
| 612 | index_buffer_offset = SetupIndexBuffer(); | 601 | index_buffer_offset = SetupIndexBuffer(); |
| 613 | 602 | ||
| 614 | // Prepare packed bindings. | 603 | // Prepare packed bindings. |
| 615 | bind_ubo_pushbuffer.Setup(0); | 604 | bind_ubo_pushbuffer.Setup(); |
| 616 | bind_ssbo_pushbuffer.Setup(0); | 605 | bind_ssbo_pushbuffer.Setup(); |
| 606 | |||
| 607 | // Setup emulation uniform buffer. | ||
| 608 | GLShader::MaxwellUniformData ubo; | ||
| 609 | ubo.SetFromRegs(gpu); | ||
| 610 | const auto [buffer, offset] = | ||
| 611 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 612 | bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, | ||
| 613 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 617 | 614 | ||
| 618 | // Setup shaders and their used resources. | 615 | // Setup shaders and their used resources. |
| 619 | texture_cache.GuardSamplers(true); | 616 | texture_cache.GuardSamplers(true); |
| @@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 754 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | 751 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, |
| 755 | launch_desc.block_dim_z, launch_desc.shared_alloc, | 752 | launch_desc.block_dim_z, launch_desc.shared_alloc, |
| 756 | launch_desc.local_pos_alloc); | 753 | launch_desc.local_pos_alloc); |
| 757 | std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); | 754 | state.draw.shader_program = kernel->GetHandle(variant); |
| 758 | state.draw.program_pipeline = 0; | 755 | state.draw.program_pipeline = 0; |
| 759 | 756 | ||
| 760 | const std::size_t buffer_size = | 757 | const std::size_t buffer_size = |
| @@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 762 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 759 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 763 | buffer_cache.Map(buffer_size); | 760 | buffer_cache.Map(buffer_size); |
| 764 | 761 | ||
| 765 | bind_ubo_pushbuffer.Setup(0); | 762 | bind_ubo_pushbuffer.Setup(); |
| 766 | bind_ssbo_pushbuffer.Setup(0); | 763 | bind_ssbo_pushbuffer.Setup(); |
| 767 | 764 | ||
| 768 | SetupComputeConstBuffers(kernel); | 765 | SetupComputeConstBuffers(kernel); |
| 769 | SetupComputeGlobalMemory(kernel); | 766 | SetupComputeGlobalMemory(kernel); |
| @@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 847 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 844 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 848 | 845 | ||
| 849 | if (params.pixel_format != pixel_format) { | 846 | if (params.pixel_format != pixel_format) { |
| 850 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | 847 | LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); |
| 851 | } | 848 | } |
| 852 | 849 | ||
| 853 | screen_info.display_texture = surface->GetTexture(); | 850 | screen_info.display_texture = surface->GetTexture(); |
| @@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 858 | 855 | ||
| 859 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { | 856 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { |
| 860 | MICROPROFILE_SCOPE(OpenGL_UBO); | 857 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 858 | const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; | ||
| 861 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 859 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 862 | const auto& shader_stage = stages[stage_index]; | 860 | const auto& shader_stage = stages[stage_index]; |
| 861 | |||
| 863 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 862 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 864 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 863 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 865 | SetupConstBuffer(buffer, entry); | 864 | SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); |
| 866 | } | 865 | } |
| 867 | } | 866 | } |
| 868 | 867 | ||
| 869 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | 868 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 870 | MICROPROFILE_SCOPE(OpenGL_UBO); | 869 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 871 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 870 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 871 | |||
| 872 | u32 binding = 0; | ||
| 872 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 873 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { |
| 873 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 874 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 874 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 875 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| @@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 876 | buffer.address = config.Address(); | 877 | buffer.address = config.Address(); |
| 877 | buffer.size = config.size; | 878 | buffer.size = config.size; |
| 878 | buffer.enabled = mask[entry.GetIndex()]; | 879 | buffer.enabled = mask[entry.GetIndex()]; |
| 879 | SetupConstBuffer(buffer, entry); | 880 | SetupConstBuffer(binding++, buffer, entry); |
| 880 | } | 881 | } |
| 881 | } | 882 | } |
| 882 | 883 | ||
| 883 | void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 884 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 884 | const GLShader::ConstBufferEntry& entry) { | 885 | const GLShader::ConstBufferEntry& entry) { |
| 885 | if (!buffer.enabled) { | 886 | if (!buffer.enabled) { |
| 886 | // Set values to zero to unbind buffers | 887 | // Set values to zero to unbind buffers |
| 887 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | 888 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| 889 | sizeof(float)); | ||
| 888 | return; | 890 | return; |
| 889 | } | 891 | } |
| 890 | 892 | ||
| @@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 895 | const auto alignment = device.GetUniformBufferAlignment(); | 897 | const auto alignment = device.GetUniformBufferAlignment(); |
| 896 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 898 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, |
| 897 | device.HasFastBufferSubData()); | 899 | device.HasFastBufferSubData()); |
| 898 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 900 | bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); |
| 899 | } | 901 | } |
| 900 | 902 | ||
| 901 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 903 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { |
| 902 | auto& gpu{system.GPU()}; | 904 | auto& gpu{system.GPU()}; |
| 903 | auto& memory_manager{gpu.MemoryManager()}; | 905 | auto& memory_manager{gpu.MemoryManager()}; |
| 904 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 906 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 907 | |||
| 908 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 905 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 909 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 906 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 910 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 907 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 911 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 908 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 912 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 909 | SetupGlobalMemory(entry, gpu_addr, size); | 913 | SetupGlobalMemory(binding++, entry, gpu_addr, size); |
| 910 | } | 914 | } |
| 911 | } | 915 | } |
| 912 | 916 | ||
| @@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 914 | auto& gpu{system.GPU()}; | 918 | auto& gpu{system.GPU()}; |
| 915 | auto& memory_manager{gpu.MemoryManager()}; | 919 | auto& memory_manager{gpu.MemoryManager()}; |
| 916 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 920 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 921 | |||
| 922 | u32 binding = 0; | ||
| 917 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 923 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { |
| 918 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 924 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 919 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 925 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 920 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 926 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 921 | SetupGlobalMemory(entry, gpu_addr, size); | 927 | SetupGlobalMemory(binding++, entry, gpu_addr, size); |
| 922 | } | 928 | } |
| 923 | } | 929 | } |
| 924 | 930 | ||
| 925 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | 931 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, |
| 926 | GPUVAddr gpu_addr, std::size_t size) { | 932 | GPUVAddr gpu_addr, std::size_t size) { |
| 927 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 933 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 928 | const auto [ssbo, buffer_offset] = | 934 | const auto [ssbo, buffer_offset] = |
| 929 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); | 935 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); |
| 930 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 936 | bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); |
| 931 | } | 937 | } |
| 932 | 938 | ||
| 933 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, | 939 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { |
| 934 | BaseBindings base_bindings) { | ||
| 935 | MICROPROFILE_SCOPE(OpenGL_Texture); | 940 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 936 | const auto& gpu = system.GPU(); | 941 | const auto& gpu = system.GPU(); |
| 937 | const auto& maxwell3d = gpu.Maxwell3D(); | 942 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 938 | const auto& entries = shader->GetShaderEntries().samplers; | 943 | const auto& entries = shader->GetShaderEntries().samplers; |
| 939 | 944 | ||
| 940 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), | 945 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 941 | "Exceeded the number of active textures."); | 946 | for (const auto& entry : entries) { |
| 942 | |||
| 943 | const auto num_entries = static_cast<u32>(entries.size()); | ||
| 944 | for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { | ||
| 945 | const auto& entry = entries[bindpoint]; | ||
| 946 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 947 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 947 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); | 948 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); |
| 948 | SetupTexture(base_bindings.sampler + bindpoint, texture, entry); | 949 | SetupTexture(binding++, texture, entry); |
| 949 | } | 950 | } |
| 950 | } | 951 | } |
| 951 | 952 | ||
| @@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 954 | const auto& compute = system.GPU().KeplerCompute(); | 955 | const auto& compute = system.GPU().KeplerCompute(); |
| 955 | const auto& entries = kernel->GetShaderEntries().samplers; | 956 | const auto& entries = kernel->GetShaderEntries().samplers; |
| 956 | 957 | ||
| 957 | ASSERT_MSG(entries.size() <= std::size(state.textures), | 958 | u32 binding = 0; |
| 958 | "Exceeded the number of active textures."); | 959 | for (const auto& entry : entries) { |
| 959 | |||
| 960 | const auto num_entries = static_cast<u32>(entries.size()); | ||
| 961 | for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { | ||
| 962 | const auto& entry = entries[bindpoint]; | ||
| 963 | const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); | 960 | const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); |
| 964 | SetupTexture(bindpoint, texture, entry); | 961 | SetupTexture(binding++, texture, entry); |
| 965 | } | 962 | } |
| 966 | } | 963 | } |
| 967 | 964 | ||
| @@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 986 | texture.tic.w_source); | 983 | texture.tic.w_source); |
| 987 | } | 984 | } |
| 988 | 985 | ||
| 989 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, | 986 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| 990 | BaseBindings base_bindings) { | ||
| 991 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 987 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 992 | const auto& entries = shader->GetShaderEntries().images; | 988 | const auto& entries = shader->GetShaderEntries().images; |
| 993 | 989 | ||
| @@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 996 | const auto& entry = entries[bindpoint]; | 992 | const auto& entry = entries[bindpoint]; |
| 997 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 993 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 998 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 994 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; |
| 999 | SetupImage(base_bindings.image + bindpoint, tic, entry); | 995 | SetupImage(bindpoint, tic, entry); |
| 1000 | } | 996 | } |
| 1001 | } | 997 | } |
| 1002 | 998 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a2ce1586..0e47d71df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -89,7 +89,7 @@ private: | |||
| 89 | void SetupComputeConstBuffers(const Shader& kernel); | 89 | void SetupComputeConstBuffers(const Shader& kernel); |
| 90 | 90 | ||
| 91 | /// Configures a constant buffer. | 91 | /// Configures a constant buffer. |
| 92 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 92 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 93 | const GLShader::ConstBufferEntry& entry); | 93 | const GLShader::ConstBufferEntry& entry); |
| 94 | 94 | ||
| 95 | /// Configures the current global memory entries to use for the draw command. | 95 | /// Configures the current global memory entries to use for the draw command. |
| @@ -99,15 +99,14 @@ private: | |||
| 99 | void SetupComputeGlobalMemory(const Shader& kernel); | 99 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 100 | 100 | ||
| 101 | /// Configures a constant buffer. | 101 | /// Configures a constant buffer. |
| 102 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 102 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 103 | std::size_t size); | 103 | std::size_t size); |
| 104 | 104 | ||
| 105 | /// Syncs all the state, shaders, render targets and textures setting before a draw call. | 105 | /// Syncs all the state, shaders, render targets and textures setting before a draw call. |
| 106 | void DrawPrelude(); | 106 | void DrawPrelude(); |
| 107 | 107 | ||
| 108 | /// Configures the current textures to use for the draw command. | 108 | /// Configures the current textures to use for the draw command. |
| 109 | void SetupDrawTextures(std::size_t stage_index, const Shader& shader, | 109 | void SetupDrawTextures(std::size_t stage_index, const Shader& shader); |
| 110 | BaseBindings base_bindings); | ||
| 111 | 110 | ||
| 112 | /// Configures the textures used in a compute shader. | 111 | /// Configures the textures used in a compute shader. |
| 113 | void SetupComputeTextures(const Shader& kernel); | 112 | void SetupComputeTextures(const Shader& kernel); |
| @@ -117,7 +116,7 @@ private: | |||
| 117 | const GLShader::SamplerEntry& entry); | 116 | const GLShader::SamplerEntry& entry); |
| 118 | 117 | ||
| 119 | /// Configures images in a graphics shader. | 118 | /// Configures images in a graphics shader. |
| 120 | void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); | 119 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); |
| 121 | 120 | ||
| 122 | /// Configures images in a compute shader. | 121 | /// Configures images in a compute shader. |
| 123 | void SetupComputeImages(const Shader& shader); | 122 | void SetupComputeImages(const Shader& shader); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f474fb550..41ca005a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -266,28 +266,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 266 | } | 266 | } |
| 267 | source += '\n'; | 267 | source += '\n'; |
| 268 | 268 | ||
| 269 | auto base_bindings = variant.base_bindings; | ||
| 270 | if (!is_compute) { | ||
| 271 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 272 | } | ||
| 273 | |||
| 274 | for (const auto& cbuf : entries.const_buffers) { | ||
| 275 | source += | ||
| 276 | fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); | ||
| 277 | } | ||
| 278 | for (const auto& gmem : entries.global_memory_entries) { | ||
| 279 | source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), | ||
| 280 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 281 | } | ||
| 282 | for (const auto& sampler : entries.samplers) { | ||
| 283 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | ||
| 284 | base_bindings.sampler++); | ||
| 285 | } | ||
| 286 | for (const auto& image : entries.images) { | ||
| 287 | source += | ||
| 288 | fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); | ||
| 289 | } | ||
| 290 | |||
| 291 | if (shader_type == ShaderType::Geometry) { | 269 | if (shader_type == ShaderType::Geometry) { |
| 292 | const auto [glsl_topology, debug_name, max_vertices] = | 270 | const auto [glsl_topology, debug_name, max_vertices] = |
| 293 | GetPrimitiveDescription(variant.primitive_mode); | 271 | GetPrimitiveDescription(variant.primitive_mode); |
| @@ -403,27 +381,21 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params, | |||
| 403 | unspecialized.code_b)); | 381 | unspecialized.code_b)); |
| 404 | } | 382 | } |
| 405 | 383 | ||
| 406 | std::tuple<GLuint, BaseBindings> CachedShader::GetHandle(const ProgramVariant& variant) { | 384 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { |
| 407 | EnsureValidLockerVariant(); | 385 | EnsureValidLockerVariant(); |
| 408 | 386 | ||
| 409 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); | 387 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); |
| 410 | auto& program = entry->second; | 388 | auto& program = entry->second; |
| 411 | if (is_cache_miss) { | 389 | if (!is_cache_miss) { |
| 412 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, | 390 | return program->handle; |
| 413 | *curr_locker_variant->locker, variant); | ||
| 414 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); | ||
| 415 | |||
| 416 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 417 | } | 391 | } |
| 418 | 392 | ||
| 419 | auto base_bindings = variant.base_bindings; | 393 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, |
| 420 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); | 394 | *curr_locker_variant->locker, variant); |
| 421 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | 395 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); |
| 422 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||
| 423 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||
| 424 | base_bindings.image += static_cast<u32>(entries.images.size()); | ||
| 425 | 396 | ||
| 426 | return {program->handle, base_bindings}; | 397 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 398 | return program->handle; | ||
| 427 | } | 399 | } |
| 428 | 400 | ||
| 429 | bool CachedShader::EnsureValidLockerVariant() { | 401 | bool CachedShader::EnsureValidLockerVariant() { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d23c8d6d4..7b1470db3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -87,7 +87,7 @@ public: | |||
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | /// Gets the GL program handle for the shader | 89 | /// Gets the GL program handle for the shader |
| 90 | std::tuple<GLuint, BaseBindings> GetHandle(const ProgramVariant& variant); | 90 | GLuint GetHandle(const ProgramVariant& variant); |
| 91 | 91 | ||
| 92 | private: | 92 | private: |
| 93 | struct LockerVariant { | 93 | struct LockerVariant { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index caec565d1..5ad285c25 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -43,6 +43,9 @@ using namespace VideoCommon::Shader; | |||
| 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 44 | using Operation = const OperationNode&; | 44 | using Operation = const OperationNode&; |
| 45 | 45 | ||
| 46 | class ASTDecompiler; | ||
| 47 | class ExprDecompiler; | ||
| 48 | |||
| 46 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 49 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 47 | 50 | ||
| 48 | struct TextureAoffi {}; | 51 | struct TextureAoffi {}; |
| @@ -337,9 +340,6 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 337 | return stage == ShaderType::Vertex; | 340 | return stage == ShaderType::Vertex; |
| 338 | } | 341 | } |
| 339 | 342 | ||
| 340 | class ASTDecompiler; | ||
| 341 | class ExprDecompiler; | ||
| 342 | |||
| 343 | class GLSLDecompiler final { | 343 | class GLSLDecompiler final { |
| 344 | public: | 344 | public: |
| 345 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 345 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, |
| @@ -621,7 +621,8 @@ private: | |||
| 621 | void DeclareConstantBuffers() { | 621 | void DeclareConstantBuffers() { |
| 622 | for (const auto& entry : ir.GetConstantBuffers()) { | 622 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 623 | const auto [index, size] = entry; | 623 | const auto [index, size] = entry; |
| 624 | code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, | 624 | const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index; |
| 625 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding, | ||
| 625 | GetConstBufferBlock(index)); | 626 | GetConstBufferBlock(index)); |
| 626 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); | 627 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); |
| 627 | code.AddLine("}};"); | 628 | code.AddLine("}};"); |
| @@ -630,6 +631,8 @@ private: | |||
| 630 | } | 631 | } |
| 631 | 632 | ||
| 632 | void DeclareGlobalMemory() { | 633 | void DeclareGlobalMemory() { |
| 634 | u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 635 | |||
| 633 | for (const auto& gmem : ir.GetGlobalMemory()) { | 636 | for (const auto& gmem : ir.GetGlobalMemory()) { |
| 634 | const auto& [base, usage] = gmem; | 637 | const auto& [base, usage] = gmem; |
| 635 | 638 | ||
| @@ -642,8 +645,8 @@ private: | |||
| 642 | qualifier += " writeonly"; | 645 | qualifier += " writeonly"; |
| 643 | } | 646 | } |
| 644 | 647 | ||
| 645 | code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", | 648 | code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, |
| 646 | base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); | 649 | GetGlobalMemoryBlock(base)); |
| 647 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); | 650 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); |
| 648 | code.AddLine("}};"); | 651 | code.AddLine("}};"); |
| 649 | code.AddNewLine(); | 652 | code.AddNewLine(); |
| @@ -653,9 +656,11 @@ private: | |||
| 653 | void DeclareSamplers() { | 656 | void DeclareSamplers() { |
| 654 | const auto& samplers = ir.GetSamplers(); | 657 | const auto& samplers = ir.GetSamplers(); |
| 655 | for (const auto& sampler : samplers) { | 658 | for (const auto& sampler : samplers) { |
| 656 | const std::string name{GetSampler(sampler)}; | 659 | const std::string name = GetSampler(sampler); |
| 657 | const std::string description{"layout (binding = SAMPLER_BINDING_" + | 660 | |
| 658 | std::to_string(sampler.GetIndex()) + ") uniform"}; | 661 | const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex(); |
| 662 | const std::string description = fmt::format("layout (binding = {}) uniform", binding); | ||
| 663 | |||
| 659 | std::string sampler_type = [&]() { | 664 | std::string sampler_type = [&]() { |
| 660 | if (sampler.IsBuffer()) { | 665 | if (sampler.IsBuffer()) { |
| 661 | return "samplerBuffer"; | 666 | return "samplerBuffer"; |
| @@ -732,10 +737,12 @@ private: | |||
| 732 | qualifier += " writeonly"; | 737 | qualifier += " writeonly"; |
| 733 | } | 738 | } |
| 734 | 739 | ||
| 740 | const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex(); | ||
| 741 | |||
| 735 | const char* format = image.IsAtomic() ? "r32ui, " : ""; | 742 | const char* format = image.IsAtomic() ? "r32ui, " : ""; |
| 736 | const char* type_declaration = GetImageTypeDeclaration(image.GetType()); | 743 | const char* type_declaration = GetImageTypeDeclaration(image.GetType()); |
| 737 | code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, | 744 | code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding, |
| 738 | image.GetIndex(), qualifier, type_declaration, GetImage(image)); | 745 | qualifier, type_declaration, GetImage(image)); |
| 739 | } | 746 | } |
| 740 | if (!images.empty()) { | 747 | if (!images.empty()) { |
| 741 | code.AddNewLine(); | 748 | code.AddNewLine(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 09f62c8c4..cf874a09a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -53,11 +53,10 @@ struct BindlessSamplerKey { | |||
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 53 | Tegra::Engines::SamplerDescriptor sampler{}; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | constexpr u32 NativeVersion = 10; | 56 | constexpr u32 NativeVersion = 11; |
| 57 | 57 | ||
| 58 | // Making sure sizes doesn't change by accident | 58 | // Making sure sizes doesn't change by accident |
| 59 | static_assert(sizeof(BaseBindings) == 16); | 59 | static_assert(sizeof(ProgramVariant) == 20); |
| 60 | static_assert(sizeof(ProgramVariant) == 36); | ||
| 61 | 60 | ||
| 62 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 63 | ShaderCacheVersionHash hash{}; | 62 | ShaderCacheVersionHash hash{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 917dbccdd..69a2fbdda 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -38,31 +38,13 @@ struct ShaderDiskCacheDump; | |||
| 38 | using ProgramCode = std::vector<u64>; | 38 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 40 | 40 | ||
| 41 | /// Allocated bindings used by an OpenGL shader program | ||
| 42 | struct BaseBindings { | ||
| 43 | u32 cbuf{}; | ||
| 44 | u32 gmem{}; | ||
| 45 | u32 sampler{}; | ||
| 46 | u32 image{}; | ||
| 47 | |||
| 48 | bool operator==(const BaseBindings& rhs) const noexcept { | ||
| 49 | return std::tie(cbuf, gmem, sampler, image) == | ||
| 50 | std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); | ||
| 51 | } | ||
| 52 | |||
| 53 | bool operator!=(const BaseBindings& rhs) const noexcept { | ||
| 54 | return !operator==(rhs); | ||
| 55 | } | ||
| 56 | }; | ||
| 57 | static_assert(std::is_trivially_copyable_v<BaseBindings>); | ||
| 58 | |||
| 59 | /// Describes the different variants a program can be compiled with. | 41 | /// Describes the different variants a program can be compiled with. |
| 60 | struct ProgramVariant final { | 42 | struct ProgramVariant final { |
| 61 | ProgramVariant() = default; | 43 | ProgramVariant() = default; |
| 62 | 44 | ||
| 63 | /// Graphics constructor. | 45 | /// Graphics constructor. |
| 64 | explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept | 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept |
| 65 | : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} | 47 | : primitive_mode{primitive_mode} {} |
| 66 | 48 | ||
| 67 | /// Compute constructor. | 49 | /// Compute constructor. |
| 68 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, |
| @@ -71,7 +53,6 @@ struct ProgramVariant final { | |||
| 71 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} |
| 72 | 54 | ||
| 73 | // Graphics specific parameters. | 55 | // Graphics specific parameters. |
| 74 | BaseBindings base_bindings{}; | ||
| 75 | GLenum primitive_mode{}; | 56 | GLenum primitive_mode{}; |
| 76 | 57 | ||
| 77 | // Compute specific parameters. | 58 | // Compute specific parameters. |
| @@ -82,10 +63,10 @@ struct ProgramVariant final { | |||
| 82 | u32 local_memory_size{}; | 63 | u32 local_memory_size{}; |
| 83 | 64 | ||
| 84 | bool operator==(const ProgramVariant& rhs) const noexcept { | 65 | bool operator==(const ProgramVariant& rhs) const noexcept { |
| 85 | return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, | 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, |
| 86 | shared_memory_size, local_memory_size) == | 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, |
| 87 | std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, | 68 | rhs.block_z, rhs.shared_memory_size, |
| 88 | rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); | 69 | rhs.local_memory_size); |
| 89 | } | 70 | } |
| 90 | 71 | ||
| 91 | bool operator!=(const ProgramVariant& rhs) const noexcept { | 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { |
| @@ -118,20 +99,9 @@ struct ShaderDiskCacheUsage { | |||
| 118 | namespace std { | 99 | namespace std { |
| 119 | 100 | ||
| 120 | template <> | 101 | template <> |
| 121 | struct hash<OpenGL::BaseBindings> { | ||
| 122 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { | ||
| 123 | return static_cast<std::size_t>(bindings.cbuf) ^ | ||
| 124 | (static_cast<std::size_t>(bindings.gmem) << 8) ^ | ||
| 125 | (static_cast<std::size_t>(bindings.sampler) << 16) ^ | ||
| 126 | (static_cast<std::size_t>(bindings.image) << 24); | ||
| 127 | } | ||
| 128 | }; | ||
| 129 | |||
| 130 | template <> | ||
| 131 | struct hash<OpenGL::ProgramVariant> { | 102 | struct hash<OpenGL::ProgramVariant> { |
| 132 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | 103 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { |
| 133 | return std::hash<OpenGL::BaseBindings>{}(variant.base_bindings) ^ | 104 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ |
| 134 | (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | ||
| 135 | static_cast<std::size_t>(variant.block_x) ^ | 105 | static_cast<std::size_t>(variant.block_x) ^ |
| 136 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | 106 | (static_cast<std::size_t>(variant.block_y) << 32) ^ |
| 137 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | 107 | (static_cast<std::size_t>(variant.block_z) << 48) ^ |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 2f601d550..296817efc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -2,9 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <string> | ||
| 6 | |||
| 5 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | |||
| 6 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 7 | #include "video_core/engines/shader_type.h" | 10 | #include "video_core/engines/shader_type.h" |
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 9 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 10 | #include "video_core/shader/shader_ir.h" | 14 | #include "video_core/shader/shader_ir.h" |
| @@ -20,12 +24,13 @@ using VideoCommon::Shader::ShaderIR; | |||
| 20 | 24 | ||
| 21 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { |
| 22 | std::string out = GetCommonDeclarations(); | 26 | std::string out = GetCommonDeclarations(); |
| 23 | out += R"( | 27 | out += fmt::format(R"( |
| 24 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | 28 | layout (std140, binding = {}) uniform vs_config {{ |
| 25 | float y_direction; | 29 | float y_direction; |
| 26 | }; | 30 | }}; |
| 27 | 31 | ||
| 28 | )"; | 32 | )", |
| 33 | EmulationUniformBlockBinding); | ||
| 29 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); | 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); |
| 30 | if (ir_b) { | 35 | if (ir_b) { |
| 31 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); | 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); |
| @@ -44,12 +49,13 @@ void main() { | |||
| 44 | 49 | ||
| 45 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | 50 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { |
| 46 | std::string out = GetCommonDeclarations(); | 51 | std::string out = GetCommonDeclarations(); |
| 47 | out += R"( | 52 | out += fmt::format(R"( |
| 48 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | 53 | layout (std140, binding = {}) uniform gs_config {{ |
| 49 | float y_direction; | 54 | float y_direction; |
| 50 | }; | 55 | }}; |
| 51 | 56 | ||
| 52 | )"; | 57 | )", |
| 58 | EmulationUniformBlockBinding); | ||
| 53 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | 59 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); |
| 54 | 60 | ||
| 55 | out += R"( | 61 | out += R"( |
| @@ -62,7 +68,7 @@ void main() { | |||
| 62 | 68 | ||
| 63 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | 69 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { |
| 64 | std::string out = GetCommonDeclarations(); | 70 | std::string out = GetCommonDeclarations(); |
| 65 | out += R"( | 71 | out += fmt::format(R"( |
| 66 | layout (location = 0) out vec4 FragColor0; | 72 | layout (location = 0) out vec4 FragColor0; |
| 67 | layout (location = 1) out vec4 FragColor1; | 73 | layout (location = 1) out vec4 FragColor1; |
| 68 | layout (location = 2) out vec4 FragColor2; | 74 | layout (location = 2) out vec4 FragColor2; |
| @@ -72,11 +78,12 @@ layout (location = 5) out vec4 FragColor5; | |||
| 72 | layout (location = 6) out vec4 FragColor6; | 78 | layout (location = 6) out vec4 FragColor6; |
| 73 | layout (location = 7) out vec4 FragColor7; | 79 | layout (location = 7) out vec4 FragColor7; |
| 74 | 80 | ||
| 75 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | 81 | layout (std140, binding = {}) uniform fs_config {{ |
| 76 | float y_direction; | 82 | float y_direction; |
| 77 | }; | 83 | }}; |
| 78 | 84 | ||
| 79 | )"; | 85 | )", |
| 86 | EmulationUniformBlockBinding); | ||
| 80 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | 87 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); |
| 81 | 88 | ||
| 82 | out += R"( | 89 | out += R"( |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ccbe5912e..4cf3d0a8a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -417,14 +417,20 @@ void OpenGLState::ApplyClipControl() { | |||
| 417 | } | 417 | } |
| 418 | 418 | ||
| 419 | void OpenGLState::ApplyTextures() { | 419 | void OpenGLState::ApplyTextures() { |
| 420 | if (const auto update = UpdateArray(cur_state.textures, textures)) { | 420 | const std::size_t size = std::size(textures); |
| 421 | glBindTextures(update->first, update->second, textures.data() + update->first); | 421 | for (std::size_t i = 0; i < size; ++i) { |
| 422 | if (UpdateValue(cur_state.textures[i], textures[i])) { | ||
| 423 | glBindTextureUnit(static_cast<GLuint>(i), textures[i]); | ||
| 424 | } | ||
| 422 | } | 425 | } |
| 423 | } | 426 | } |
| 424 | 427 | ||
| 425 | void OpenGLState::ApplySamplers() { | 428 | void OpenGLState::ApplySamplers() { |
| 426 | if (const auto update = UpdateArray(cur_state.samplers, samplers)) { | 429 | const std::size_t size = std::size(samplers); |
| 427 | glBindSamplers(update->first, update->second, samplers.data() + update->first); | 430 | for (std::size_t i = 0; i < size; ++i) { |
| 431 | if (UpdateValue(cur_state.samplers[i], samplers[i])) { | ||
| 432 | glBindSampler(static_cast<GLuint>(i), samplers[i]); | ||
| 433 | } | ||
| 428 | } | 434 | } |
| 429 | } | 435 | } |
| 430 | 436 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index eaff22bda..fd53eb81a 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -96,8 +96,9 @@ public: | |||
| 96 | GLenum operation = GL_COPY; | 96 | GLenum operation = GL_COPY; |
| 97 | } logic_op; | 97 | } logic_op; |
| 98 | 98 | ||
| 99 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; | 99 | static constexpr std::size_t NumSamplers = 32 * 5; |
| 100 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; | 100 | std::array<GLuint, NumSamplers> textures = {}; |
| 101 | std::array<GLuint, NumSamplers> samplers = {}; | ||
| 101 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; | 102 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; |
| 102 | 103 | ||
| 103 | struct { | 104 | struct { |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index c504a2c1a..9770dda1c 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -3,7 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <string> | 5 | #include <string> |
| 6 | #include <vector> | ||
| 7 | |||
| 6 | #include <fmt/format.h> | 8 | #include <fmt/format.h> |
| 9 | |||
| 7 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 8 | 11 | ||
| 9 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| @@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t | |||
| 48 | 51 | ||
| 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 52 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 50 | 53 | ||
| 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 54 | void BindBuffersRangePushBuffer::Setup() { |
| 52 | first = first_; | 55 | entries.clear(); |
| 53 | buffer_pointers.clear(); | ||
| 54 | offsets.clear(); | ||
| 55 | sizes.clear(); | ||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, |
| 59 | buffer_pointers.push_back(buffer); | 59 | GLsizeiptr size) { |
| 60 | offsets.push_back(offset); | 60 | entries.push_back(Entry{binding, buffer, offset, size}); |
| 61 | sizes.push_back(size); | ||
| 62 | } | 61 | } |
| 63 | 62 | ||
| 64 | void BindBuffersRangePushBuffer::Bind() { | 63 | void BindBuffersRangePushBuffer::Bind() { |
| 65 | // Ensure sizes are valid. | 64 | for (const Entry& entry : entries) { |
| 66 | const std::size_t count{buffer_pointers.size()}; | 65 | glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); |
| 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | ||
| 68 | if (count == 0) { | ||
| 69 | return; | ||
| 70 | } | 66 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | ||
| 78 | sizes.data()); | ||
| 79 | } | 67 | } |
| 80 | 68 | ||
| 81 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { | 69 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 6c2b45546..d56153fe7 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -43,20 +43,22 @@ public: | |||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | 43 | explicit BindBuffersRangePushBuffer(GLenum target); |
| 44 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 45 | 45 | ||
| 46 | void Setup(GLuint first_); | 46 | void Setup(); |
| 47 | 47 | ||
| 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 49 | 49 | ||
| 50 | void Bind(); | 50 | void Bind(); |
| 51 | 51 | ||
| 52 | private: | 52 | private: |
| 53 | GLenum target{}; | 53 | struct Entry { |
| 54 | GLuint first{}; | 54 | GLuint binding; |
| 55 | std::vector<const GLuint*> buffer_pointers; | 55 | const GLuint* buffer; |
| 56 | GLintptr offset; | ||
| 57 | GLsizeiptr size; | ||
| 58 | }; | ||
| 56 | 59 | ||
| 57 | std::vector<GLuint> buffers; | 60 | GLenum target; |
| 58 | std::vector<GLintptr> offsets; | 61 | std::vector<Entry> entries; |
| 59 | std::vector<GLsizeiptr> sizes; | ||
| 60 | }; | 62 | }; |
| 61 | 63 | ||
| 62 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | 64 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); |