diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 121 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 |
3 files changed, 89 insertions, 43 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d4fcedace..609504795 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -500,6 +500,11 @@ public: | |||
| 500 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | | 500 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | |
| 501 | start_low); | 501 | start_low); |
| 502 | } | 502 | } |
| 503 | |||
| 504 | bool IsEnabled() const { | ||
| 505 | return enable != 0 && StartAddress() != 0; | ||
| 506 | } | ||
| 507 | |||
| 503 | } vertex_array[NumVertexArrays]; | 508 | } vertex_array[NumVertexArrays]; |
| 504 | 509 | ||
| 505 | Blend blend; | 510 | Blend blend; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d4a0d6db..82001e7b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { | |||
| 127 | } | 127 | } |
| 128 | } | 128 | } |
| 129 | 129 | ||
| 130 | void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | 130 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, |
| 131 | GLintptr buffer_offset) { | ||
| 131 | MICROPROFILE_SCOPE(OpenGL_VAO); | 132 | MICROPROFILE_SCOPE(OpenGL_VAO); |
| 132 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | 133 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; |
| 133 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | 134 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |
| @@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | |||
| 136 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 137 | state.draw.vertex_buffer = stream_buffer->GetHandle(); |
| 137 | state.Apply(); | 138 | state.Apply(); |
| 138 | 139 | ||
| 139 | // TODO(bunnei): Add support for 1+ vertex arrays | 140 | // Upload all guest vertex arrays sequentially to our buffer |
| 140 | const auto& vertex_array{regs.vertex_array[0]}; | 141 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 141 | const auto& vertex_array_limit{regs.vertex_array_limit[0]}; | 142 | const auto& vertex_array = regs.vertex_array[index]; |
| 142 | ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); | 143 | if (!vertex_array.IsEnabled()) |
| 143 | ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); | 144 | continue; |
| 144 | for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { | 145 | |
| 145 | ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); | 146 | const Tegra::GPUVAddr start = vertex_array.StartAddress(); |
| 147 | const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 148 | |||
| 149 | ASSERT(end > start); | ||
| 150 | u64 size = end - start + 1; | ||
| 151 | |||
| 152 | // Copy vertex array data | ||
| 153 | const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; | ||
| 154 | res_cache.FlushRegion(data_addr, size, nullptr); | ||
| 155 | Memory::ReadBlock(data_addr, array_ptr, size); | ||
| 156 | |||
| 157 | // Bind the vertex array to the buffer at the current offset. | ||
| 158 | glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||
| 159 | |||
| 160 | ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||
| 161 | |||
| 162 | array_ptr += size; | ||
| 163 | buffer_offset += size; | ||
| 146 | } | 164 | } |
| 147 | 165 | ||
| 148 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 166 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 149 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually used | 167 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually used |
| 150 | // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now | 168 | // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now |
| 151 | // to avoid OpenGL errors. | 169 | // to avoid OpenGL errors. |
| 170 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | ||
| 171 | // assume every shader uses them all. | ||
| 152 | for (unsigned index = 0; index < 16; ++index) { | 172 | for (unsigned index = 0; index < 16; ++index) { |
| 153 | auto& attrib = regs.vertex_attrib_format[index]; | 173 | auto& attrib = regs.vertex_attrib_format[index]; |
| 154 | NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | 174 | NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |
| 155 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | 175 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |
| 156 | attrib.offset.Value(), attrib.IsNormalized()); | 176 | attrib.offset.Value(), attrib.IsNormalized()); |
| 157 | 177 | ||
| 158 | glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | 178 | auto& buffer = regs.vertex_array[attrib.buffer]; |
| 159 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, | 179 | ASSERT(buffer.IsEnabled()); |
| 160 | reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); | 180 | |
| 161 | glEnableVertexAttribArray(index); | 181 | glEnableVertexAttribArray(index); |
| 182 | glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||
| 183 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||
| 184 | glVertexAttribBinding(index, attrib.buffer); | ||
| 185 | |||
| 162 | hw_vao_enabled_attributes[index] = true; | 186 | hw_vao_enabled_attributes[index] = true; |
| 163 | } | 187 | } |
| 164 | 188 | ||
| 165 | // Copy vertex array data | 189 | return {array_ptr, buffer_offset}; |
| 166 | const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; | ||
| 167 | const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; | ||
| 168 | res_cache.FlushRegion(data_addr, data_size, nullptr); | ||
| 169 | Memory::ReadBlock(data_addr, array_ptr, data_size); | ||
| 170 | |||
| 171 | array_ptr += data_size; | ||
| 172 | buffer_offset += data_size; | ||
| 173 | } | 190 | } |
| 174 | 191 | ||
| 175 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { | 192 | void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { |
| 176 | // Helper function for uploading uniform data | 193 | // Helper function for uploading uniform data |
| 177 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | 194 | const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { |
| 178 | if (has_ARB_direct_state_access) { | 195 | if (has_ARB_direct_state_access) { |
| @@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 190 | u32 current_constbuffer_bindpoint = 0; | 207 | u32 current_constbuffer_bindpoint = 0; |
| 191 | 208 | ||
| 192 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | 209 | for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { |
| 193 | ptr_pos += sizeof(GLShader::MaxwellUniformData); | ||
| 194 | |||
| 195 | auto& shader_config = gpu.regs.shader_config[index]; | 210 | auto& shader_config = gpu.regs.shader_config[index]; |
| 196 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 211 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 197 | 212 | ||
| @@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 205 | } | 220 | } |
| 206 | 221 | ||
| 207 | // Upload uniform data as one UBO per stage | 222 | // Upload uniform data as one UBO per stage |
| 208 | const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | 223 | const GLintptr ubo_offset = buffer_offset; |
| 209 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, | 224 | copy_buffer(uniform_buffers[stage].handle, ubo_offset, |
| 210 | sizeof(GLShader::MaxwellUniformData)); | 225 | sizeof(GLShader::MaxwellUniformData)); |
| 211 | GLShader::MaxwellUniformData* ub_ptr = | 226 | GLShader::MaxwellUniformData* ub_ptr = |
| 212 | reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); | 227 | reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr); |
| 213 | ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | 228 | ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); |
| 214 | 229 | ||
| 230 | buffer_ptr += sizeof(GLShader::MaxwellUniformData); | ||
| 231 | buffer_offset += sizeof(GLShader::MaxwellUniformData); | ||
| 232 | |||
| 215 | // Fetch program code from memory | 233 | // Fetch program code from memory |
| 216 | GLShader::ProgramCode program_code; | 234 | GLShader::ProgramCode program_code; |
| 217 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | 235 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; |
| @@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | |||
| 252 | shader_program_manager->UseTrivialGeometryShader(); | 270 | shader_program_manager->UseTrivialGeometryShader(); |
| 253 | } | 271 | } |
| 254 | 272 | ||
| 273 | size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||
| 274 | const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||
| 275 | |||
| 276 | size_t size = 0; | ||
| 277 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 278 | if (!regs.vertex_array[index].IsEnabled()) | ||
| 279 | continue; | ||
| 280 | |||
| 281 | const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); | ||
| 282 | const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 283 | |||
| 284 | ASSERT(end > start); | ||
| 285 | size += end - start + 1; | ||
| 286 | } | ||
| 287 | |||
| 288 | return size; | ||
| 289 | } | ||
| 290 | |||
| 255 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | 291 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { |
| 256 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | 292 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; |
| 257 | DrawArrays(); | 293 | DrawArrays(); |
| @@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 329 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | 365 | const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; |
| 330 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | 366 | const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; |
| 331 | 367 | ||
| 332 | // TODO(bunnei): Add support for 1+ vertex arrays | ||
| 333 | vs_input_size = vertex_num * regs.vertex_array[0].stride; | ||
| 334 | |||
| 335 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | 368 | state.draw.vertex_buffer = stream_buffer->GetHandle(); |
| 336 | state.Apply(); | 369 | state.Apply(); |
| 337 | 370 | ||
| 338 | size_t buffer_size = static_cast<size_t>(vs_input_size); | 371 | size_t buffer_size = CalculateVertexArraysSize(); |
| 372 | |||
| 339 | if (is_indexed) { | 373 | if (is_indexed) { |
| 340 | buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; | 374 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; |
| 341 | } | 375 | } |
| 342 | 376 | ||
| 343 | // Uniform space for the 5 shader stages | 377 | // Uniform space for the 5 shader stages |
| 344 | buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | 378 | buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + |
| 379 | sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||
| 345 | 380 | ||
| 346 | size_t ptr_pos = 0; | ||
| 347 | u8* buffer_ptr; | 381 | u8* buffer_ptr; |
| 348 | GLintptr buffer_offset; | 382 | GLintptr buffer_offset; |
| 349 | std::tie(buffer_ptr, buffer_offset) = | 383 | std::tie(buffer_ptr, buffer_offset) = |
| 350 | stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | 384 | stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); |
| 351 | 385 | ||
| 352 | SetupVertexArray(buffer_ptr, buffer_offset); | 386 | u8* offseted_buffer; |
| 353 | ptr_pos += vs_input_size; | 387 | std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); |
| 388 | |||
| 389 | offseted_buffer = | ||
| 390 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 391 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 354 | 392 | ||
| 355 | // If indexed mode, copy the index buffer | 393 | // If indexed mode, copy the index buffer |
| 356 | GLintptr index_buffer_offset = 0; | 394 | GLintptr index_buffer_offset = 0; |
| 357 | if (is_indexed) { | 395 | if (is_indexed) { |
| 358 | ptr_pos = Common::AlignUp(ptr_pos, 4); | ||
| 359 | |||
| 360 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | 396 | const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; |
| 361 | const VAddr index_data_addr{ | 397 | const VAddr index_data_addr{ |
| 362 | memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; | 398 | memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; |
| 363 | Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); | 399 | Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); |
| 364 | 400 | ||
| 365 | index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | 401 | index_buffer_offset = buffer_offset; |
| 366 | ptr_pos += index_buffer_size; | 402 | offseted_buffer += index_buffer_size; |
| 403 | buffer_offset += index_buffer_size; | ||
| 367 | } | 404 | } |
| 368 | 405 | ||
| 369 | SetupShaders(buffer_ptr, buffer_offset, ptr_pos); | 406 | offseted_buffer = |
| 407 | reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||
| 408 | buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||
| 409 | |||
| 410 | SetupShaders(offseted_buffer, buffer_offset); | ||
| 370 | 411 | ||
| 371 | stream_buffer->Unmap(); | 412 | stream_buffer->Unmap(); |
| 372 | 413 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 03e02b52a..544714b95 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -148,13 +148,13 @@ private: | |||
| 148 | static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; | 148 | static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; |
| 149 | std::unique_ptr<OGLStreamBuffer> stream_buffer; | 149 | std::unique_ptr<OGLStreamBuffer> stream_buffer; |
| 150 | 150 | ||
| 151 | GLsizeiptr vs_input_size; | 151 | size_t CalculateVertexArraysSize() const; |
| 152 | 152 | ||
| 153 | void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | 153 | std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); |
| 154 | 154 | ||
| 155 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | 155 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; |
| 156 | 156 | ||
| 157 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); | 157 | void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); |
| 158 | 158 | ||
| 159 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 159 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 160 | AccelDraw accelerate_draw; | 160 | AccelDraw accelerate_draw; |