diff options
| author | 2018-06-09 18:02:05 -0500 | |
|---|---|---|
| committer | 2018-06-09 18:02:05 -0500 | |
| commit | 2a7653142dd8adeacafd8c90e64d52d6959f0aa7 (patch) | |
| tree | 45507c03f0a4bfa0fb0dc9736fccd448bb9ea0df | |
| parent | Merge pull request #550 from Subv/ssy (diff) | |
| download | yuzu-2a7653142dd8adeacafd8c90e64d52d6959f0aa7.tar.gz yuzu-2a7653142dd8adeacafd8c90e64d52d6959f0aa7.tar.xz yuzu-2a7653142dd8adeacafd8c90e64d52d6959f0aa7.zip | |
Rasterizer: Use UBOs instead of SSBOs for uploading const buffers.
This should help a bit with GPU performance once we're GPU-bound.
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 2 |
4 files changed, 39 insertions, 18 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6f05f24a0..e04966849 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -197,8 +197,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | |||
| 197 | ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); | 197 | ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); |
| 198 | 198 | ||
| 199 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | 199 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL |
| 200 | // shaders. | 200 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. |
| 201 | u32 current_constbuffer_bindpoint = 0; | 201 | u32 current_constbuffer_bindpoint = uniform_buffers.size(); |
| 202 | u32 current_texture_bindpoint = 0; | 202 | u32 current_texture_bindpoint = 0; |
| 203 | 203 | ||
| 204 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | 204 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -608,27 +608,33 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr | |||
| 608 | 608 | ||
| 609 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); | 609 | boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); |
| 610 | 610 | ||
| 611 | std::vector<u8> data; | 611 | size_t size = 0; |
| 612 | |||
| 612 | if (used_buffer.IsIndirect()) { | 613 | if (used_buffer.IsIndirect()) { |
| 613 | // Buffer is accessed indirectly, so upload the entire thing | 614 | // Buffer is accessed indirectly, so upload the entire thing |
| 614 | data.resize(buffer.size * sizeof(float)); | 615 | size = buffer.size * sizeof(float); |
| 615 | } else { | 616 | } else { |
| 616 | // Buffer is accessed directly, upload just what we use | 617 | // Buffer is accessed directly, upload just what we use |
| 617 | data.resize(used_buffer.GetSize() * sizeof(float)); | 618 | size = used_buffer.GetSize() * sizeof(float); |
| 618 | } | 619 | } |
| 619 | 620 | ||
| 621 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | ||
| 622 | // UBO alignment requirements. | ||
| 623 | size = Common::AlignUp(size, sizeof(GLvec4)); | ||
| 624 | ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | ||
| 625 | |||
| 626 | std::vector<u8> data(size); | ||
| 620 | Memory::ReadBlock(*addr, data.data(), data.size()); | 627 | Memory::ReadBlock(*addr, data.data(), data.size()); |
| 621 | 628 | ||
| 622 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); | 629 | glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); |
| 623 | glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); | 630 | glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); |
| 624 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); | 631 | glBindBuffer(GL_UNIFORM_BUFFER, 0); |
| 625 | 632 | ||
| 626 | // Now configure the bindpoint of the buffer inside the shader | 633 | // Now configure the bindpoint of the buffer inside the shader |
| 627 | std::string buffer_name = used_buffer.GetName(); | 634 | std::string buffer_name = used_buffer.GetName(); |
| 628 | GLuint index = | 635 | GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); |
| 629 | glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str()); | ||
| 630 | if (index != -1) | 636 | if (index != -1) |
| 631 | glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); | 637 | glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); |
| 632 | } | 638 | } |
| 633 | 639 | ||
| 634 | state.Apply(); | 640 | state.Apply(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b7c8cf843..2ab066681 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -54,6 +54,11 @@ public: | |||
| 54 | OGLShader shader; | 54 | OGLShader shader; |
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 58 | static constexpr size_t MaxConstbufferSize = 0x1000; | ||
| 59 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 60 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 61 | |||
| 57 | private: | 62 | private: |
| 58 | class SamplerInfo { | 63 | class SamplerInfo { |
| 59 | public: | 64 | public: |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 37fbb94da..87ae47ac9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/shader_bytecode.h" | 11 | #include "video_core/engines/shader_bytecode.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 13 | 14 | ||
| 14 | namespace GLShader { | 15 | namespace GLShader { |
| @@ -366,7 +367,8 @@ public: | |||
| 366 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. | 367 | /// Generates code representing a uniform (C buffer) register, interpreted as the input type. |
| 367 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { | 368 | std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { |
| 368 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); | 369 | declr_const_buffers[index].MarkAsUsed(index, offset, stage); |
| 369 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; | 370 | std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + |
| 371 | std::to_string(offset % 4) + ']'; | ||
| 370 | 372 | ||
| 371 | if (type == GLSLRegister::Type::Float) { | 373 | if (type == GLSLRegister::Type::Float) { |
| 372 | return value; | 374 | return value; |
| @@ -380,8 +382,12 @@ public: | |||
| 380 | std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, | 382 | std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, |
| 381 | GLSLRegister::Type type) { | 383 | GLSLRegister::Type type) { |
| 382 | declr_const_buffers[index].MarkAsUsedIndirect(index, stage); | 384 | declr_const_buffers[index].MarkAsUsedIndirect(index, stage); |
| 383 | std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" + | 385 | |
| 384 | GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; | 386 | std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + |
| 387 | std::to_string(offset) + ") / 4)"; | ||
| 388 | |||
| 389 | std::string value = | ||
| 390 | 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; | ||
| 385 | 391 | ||
| 386 | if (type == GLSLRegister::Type::Float) { | 392 | if (type == GLSLRegister::Type::Float) { |
| 387 | return value; | 393 | return value; |
| @@ -423,9 +429,10 @@ public: | |||
| 423 | 429 | ||
| 424 | unsigned const_buffer_layout = 0; | 430 | unsigned const_buffer_layout = 0; |
| 425 | for (const auto& entry : GetConstBuffersDeclarations()) { | 431 | for (const auto& entry : GetConstBuffersDeclarations()) { |
| 426 | declarations.AddLine("layout(std430) buffer " + entry.GetName()); | 432 | declarations.AddLine("layout(std140) uniform " + entry.GetName()); |
| 427 | declarations.AddLine('{'); | 433 | declarations.AddLine('{'); |
| 428 | declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); | 434 | declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + |
| 435 | "[MAX_CONSTBUFFER_ELEMENTS];"); | ||
| 429 | declarations.AddLine("};"); | 436 | declarations.AddLine("};"); |
| 430 | declarations.AddNewLine(); | 437 | declarations.AddNewLine(); |
| 431 | ++const_buffer_layout; | 438 | ++const_buffer_layout; |
| @@ -1611,7 +1618,10 @@ private: | |||
| 1611 | }; // namespace Decompiler | 1618 | }; // namespace Decompiler |
| 1612 | 1619 | ||
| 1613 | std::string GetCommonDeclarations() { | 1620 | std::string GetCommonDeclarations() { |
| 1614 | return "bool exec_shader();"; | 1621 | std::string declarations = "bool exec_shader();\n"; |
| 1622 | declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + | ||
| 1623 | std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); | ||
| 1624 | return declarations; | ||
| 1615 | } | 1625 | } |
| 1616 | 1626 | ||
| 1617 | boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, | 1627 | boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 44f0c8a01..443ce3f2b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -223,7 +223,7 @@ void OpenGLState::Apply() const { | |||
| 223 | if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || | 223 | if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || |
| 224 | current.ssbo != new_state.ssbo) { | 224 | current.ssbo != new_state.ssbo) { |
| 225 | if (new_state.enabled) { | 225 | if (new_state.enabled) { |
| 226 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo); | 226 | glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); |
| 227 | } | 227 | } |
| 228 | } | 228 | } |
| 229 | } | 229 | } |