diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 102 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 29 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 181 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 10 |
9 files changed, 273 insertions, 91 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fb3d1112c..b318aedb8 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 92 | 92 | ||
| 93 | // Some games (like Super Mario Odyssey) assume that SRGB is enabled. | 93 | // Some games (like Super Mario Odyssey) assume that SRGB is enabled. |
| 94 | regs.framebuffer_srgb = 1; | 94 | regs.framebuffer_srgb = 1; |
| 95 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; | ||
| 96 | mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; | ||
| 97 | mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; | ||
| 98 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; | ||
| 95 | } | 99 | } |
| 96 | 100 | ||
| 97 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | 101 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) |
| @@ -256,6 +260,9 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 | |||
| 256 | 260 | ||
| 257 | // Execute the current macro. | 261 | // Execute the current macro. |
| 258 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); | 262 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); |
| 263 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | ||
| 264 | FlushMMEInlineDraw(); | ||
| 265 | } | ||
| 259 | } | 266 | } |
| 260 | 267 | ||
| 261 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 268 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { |
| @@ -416,6 +423,97 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 416 | } | 423 | } |
| 417 | } | 424 | } |
| 418 | 425 | ||
| 426 | void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { | ||
| 427 | if (mme_draw.current_mode == MMEDrawMode::Undefined) { | ||
| 428 | if (mme_draw.gl_begin_consume) { | ||
| 429 | mme_draw.current_mode = expected_mode; | ||
| 430 | mme_draw.current_count = count; | ||
| 431 | mme_draw.instance_count = 1; | ||
| 432 | mme_draw.gl_begin_consume = false; | ||
| 433 | mme_draw.gl_end_count = 0; | ||
| 434 | } | ||
| 435 | return; | ||
| 436 | } else { | ||
| 437 | if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count && | ||
| 438 | mme_draw.instance_mode && mme_draw.gl_begin_consume) { | ||
| 439 | mme_draw.instance_count++; | ||
| 440 | mme_draw.gl_begin_consume = false; | ||
| 441 | return; | ||
| 442 | } else { | ||
| 443 | FlushMMEInlineDraw(); | ||
| 444 | } | ||
| 445 | } | ||
| 446 | // Tail call in case it needs to retry. | ||
| 447 | StepInstance(expected_mode, count); | ||
| 448 | } | ||
| 449 | |||
| 450 | void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | ||
| 451 | const u32 method = method_call.method; | ||
| 452 | if (mme_inline[method]) { | ||
| 453 | regs.reg_array[method] = method_call.argument; | ||
| 454 | if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || | ||
| 455 | method == MAXWELL3D_REG_INDEX(index_array.count)) { | ||
| 456 | const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) | ||
| 457 | ? MMEDrawMode::Array | ||
| 458 | : MMEDrawMode::Indexed; | ||
| 459 | StepInstance(expected_mode, method_call.argument); | ||
| 460 | } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { | ||
| 461 | mme_draw.instance_mode = | ||
| 462 | (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); | ||
| 463 | mme_draw.gl_begin_consume = true; | ||
| 464 | } else { | ||
| 465 | mme_draw.gl_end_count++; | ||
| 466 | } | ||
| 467 | } else { | ||
| 468 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | ||
| 469 | FlushMMEInlineDraw(); | ||
| 470 | } | ||
| 471 | CallMethod(method_call); | ||
| 472 | } | ||
| 473 | } | ||
| 474 | |||
| 475 | void Maxwell3D::FlushMMEInlineDraw() { | ||
| 476 | LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), | ||
| 477 | regs.vertex_buffer.count); | ||
| 478 | ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); | ||
| 479 | ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); | ||
| 480 | |||
| 481 | auto debug_context = system.GetGPUDebugContext(); | ||
| 482 | |||
| 483 | if (debug_context) { | ||
| 484 | debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||
| 485 | } | ||
| 486 | |||
| 487 | // Both instance configuration registers can not be set at the same time. | ||
| 488 | ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, | ||
| 489 | "Illegal combination of instancing parameters"); | ||
| 490 | |||
| 491 | const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; | ||
| 492 | if (ShouldExecute()) { | ||
| 493 | rasterizer.DrawMultiBatch(is_indexed); | ||
| 494 | } | ||
| 495 | |||
| 496 | if (debug_context) { | ||
| 497 | debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); | ||
| 498 | } | ||
| 499 | |||
| 500 | // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if | ||
| 501 | // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - | ||
| 502 | // it's possible that it is incorrect and that there is some other register used to specify the | ||
| 503 | // drawing mode. | ||
| 504 | if (is_indexed) { | ||
| 505 | regs.index_array.count = 0; | ||
| 506 | } else { | ||
| 507 | regs.vertex_buffer.count = 0; | ||
| 508 | } | ||
| 509 | mme_draw.current_mode = MMEDrawMode::Undefined; | ||
| 510 | mme_draw.current_count = 0; | ||
| 511 | mme_draw.instance_count = 0; | ||
| 512 | mme_draw.instance_mode = false; | ||
| 513 | mme_draw.gl_begin_consume = false; | ||
| 514 | mme_draw.gl_end_count = 0; | ||
| 515 | } | ||
| 516 | |||
| 419 | void Maxwell3D::ProcessMacroUpload(u32 data) { | 517 | void Maxwell3D::ProcessMacroUpload(u32 data) { |
| 420 | ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), | 518 | ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), |
| 421 | "upload_address exceeded macro_memory size!"); | 519 | "upload_address exceeded macro_memory size!"); |
| @@ -564,7 +662,9 @@ void Maxwell3D::DrawArrays() { | |||
| 564 | } | 662 | } |
| 565 | 663 | ||
| 566 | const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; | 664 | const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; |
| 567 | rasterizer.AccelerateDrawBatch(is_indexed); | 665 | if (ShouldExecute()) { |
| 666 | rasterizer.DrawBatch(is_indexed); | ||
| 667 | } | ||
| 568 | 668 | ||
| 569 | if (debug_context) { | 669 | if (debug_context) { |
| 570 | debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); | 670 | debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e5ec90717..4c97759ed 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -811,8 +811,9 @@ public: | |||
| 811 | INSERT_PADDING_WORDS(0x21); | 811 | INSERT_PADDING_WORDS(0x21); |
| 812 | 812 | ||
| 813 | u32 vb_element_base; | 813 | u32 vb_element_base; |
| 814 | u32 vb_base_instance; | ||
| 814 | 815 | ||
| 815 | INSERT_PADDING_WORDS(0x36); | 816 | INSERT_PADDING_WORDS(0x35); |
| 816 | 817 | ||
| 817 | union { | 818 | union { |
| 818 | BitField<0, 1, u32> c0; | 819 | BitField<0, 1, u32> c0; |
| @@ -1238,6 +1239,11 @@ public: | |||
| 1238 | /// Write the value to the register identified by method. | 1239 | /// Write the value to the register identified by method. |
| 1239 | void CallMethod(const GPU::MethodCall& method_call); | 1240 | void CallMethod(const GPU::MethodCall& method_call); |
| 1240 | 1241 | ||
| 1242 | /// Write the value to the register identified by method. | ||
| 1243 | void CallMethodFromMME(const GPU::MethodCall& method_call); | ||
| 1244 | |||
| 1245 | void FlushMMEInlineDraw(); | ||
| 1246 | |||
| 1241 | /// Given a Texture Handle, returns the TSC and TIC entries. | 1247 | /// Given a Texture Handle, returns the TSC and TIC entries. |
| 1242 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, | 1248 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, |
| 1243 | std::size_t offset) const; | 1249 | std::size_t offset) const; |
| @@ -1263,6 +1269,21 @@ public: | |||
| 1263 | return execute_on; | 1269 | return execute_on; |
| 1264 | } | 1270 | } |
| 1265 | 1271 | ||
| 1272 | enum class MMEDrawMode : u32 { | ||
| 1273 | Undefined, | ||
| 1274 | Array, | ||
| 1275 | Indexed, | ||
| 1276 | }; | ||
| 1277 | |||
| 1278 | struct MMEDrawState { | ||
| 1279 | MMEDrawMode current_mode{MMEDrawMode::Undefined}; | ||
| 1280 | u32 current_count{}; | ||
| 1281 | u32 instance_count{}; | ||
| 1282 | bool instance_mode{}; | ||
| 1283 | bool gl_begin_consume{}; | ||
| 1284 | u32 gl_end_count{}; | ||
| 1285 | } mme_draw; | ||
| 1286 | |||
| 1266 | private: | 1287 | private: |
| 1267 | void InitializeRegisterDefaults(); | 1288 | void InitializeRegisterDefaults(); |
| 1268 | 1289 | ||
| @@ -1275,6 +1296,8 @@ private: | |||
| 1275 | /// Start offsets of each macro in macro_memory | 1296 | /// Start offsets of each macro in macro_memory |
| 1276 | std::array<u32, 0x80> macro_positions = {}; | 1297 | std::array<u32, 0x80> macro_positions = {}; |
| 1277 | 1298 | ||
| 1299 | std::array<bool, Regs::NUM_REGS> mme_inline{}; | ||
| 1300 | |||
| 1278 | /// Memory for macro code | 1301 | /// Memory for macro code |
| 1279 | MacroMemory macro_memory; | 1302 | MacroMemory macro_memory; |
| 1280 | 1303 | ||
| @@ -1346,6 +1369,9 @@ private: | |||
| 1346 | 1369 | ||
| 1347 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. | 1370 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. |
| 1348 | void DrawArrays(); | 1371 | void DrawArrays(); |
| 1372 | |||
| 1373 | // Handles a instance drawcall from MME | ||
| 1374 | void StepInstance(MMEDrawMode expected_mode, u32 count); | ||
| 1349 | }; | 1375 | }; |
| 1350 | 1376 | ||
| 1351 | #define ASSERT_REG_POSITION(field_name, position) \ | 1377 | #define ASSERT_REG_POSITION(field_name, position) \ |
| @@ -1402,6 +1428,7 @@ ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); | |||
| 1402 | ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); | 1428 | ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); |
| 1403 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); | 1429 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); |
| 1404 | ASSERT_REG_POSITION(vb_element_base, 0x50D); | 1430 | ASSERT_REG_POSITION(vb_element_base, 0x50D); |
| 1431 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); | ||
| 1405 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | 1432 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); |
| 1406 | ASSERT_REG_POSITION(point_size, 0x546); | 1433 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1407 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1434 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 62afc0d11..dbaeac6db 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -257,7 +257,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) { | |||
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | void MacroInterpreter::Send(u32 value) { | 259 | void MacroInterpreter::Send(u32 value) { |
| 260 | maxwell3d.CallMethod({method_address.address, value}); | 260 | maxwell3d.CallMethodFromMME({method_address.address, value}); |
| 261 | // Increment the method address by the method increment. | 261 | // Increment the method address by the method increment. |
| 262 | method_address.address.Assign(method_address.address.Value() + | 262 | method_address.address.Assign(method_address.address.Value() + |
| 263 | method_address.increment.Value()); | 263 | method_address.increment.Value()); |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6b3f2d50a..5b0eca9e2 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -29,7 +29,10 @@ public: | |||
| 29 | virtual ~RasterizerInterface() {} | 29 | virtual ~RasterizerInterface() {} |
| 30 | 30 | ||
| 31 | /// Draw the current batch of vertex arrays | 31 | /// Draw the current batch of vertex arrays |
| 32 | virtual void DrawArrays() = 0; | 32 | virtual bool DrawBatch(bool is_indexed) = 0; |
| 33 | |||
| 34 | /// Draw the current batch of multiple instances of vertex arrays | ||
| 35 | virtual bool DrawMultiBatch(bool is_indexed) = 0; | ||
| 33 | 36 | ||
| 34 | /// Clear the current framebuffer | 37 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 38 | virtual void Clear() = 0; |
| @@ -69,10 +72,6 @@ public: | |||
| 69 | return false; | 72 | return false; |
| 70 | } | 73 | } |
| 71 | 74 | ||
| 72 | virtual bool AccelerateDrawBatch(bool is_indexed) { | ||
| 73 | return false; | ||
| 74 | } | ||
| 75 | |||
| 76 | /// Increase/decrease the number of object in pages touching the specified region | 75 | /// Increase/decrease the number of object in pages touching the specified region |
| 77 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | 76 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} |
| 78 | 77 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a2c1473db..6a17bed72 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -49,40 +49,6 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | |||
| 49 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 49 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); |
| 50 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | 50 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); |
| 51 | 51 | ||
| 52 | struct DrawParameters { | ||
| 53 | GLenum primitive_mode; | ||
| 54 | GLsizei count; | ||
| 55 | GLint current_instance; | ||
| 56 | bool use_indexed; | ||
| 57 | |||
| 58 | GLint vertex_first; | ||
| 59 | |||
| 60 | GLenum index_format; | ||
| 61 | GLint base_vertex; | ||
| 62 | GLintptr index_buffer_offset; | ||
| 63 | |||
| 64 | void DispatchDraw() const { | ||
| 65 | if (use_indexed) { | ||
| 66 | const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); | ||
| 67 | if (current_instance > 0) { | ||
| 68 | glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, | ||
| 69 | index_buffer_ptr, 1, base_vertex, | ||
| 70 | current_instance); | ||
| 71 | } else { | ||
| 72 | glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, | ||
| 73 | base_vertex); | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | if (current_instance > 0) { | ||
| 77 | glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, | ||
| 78 | current_instance); | ||
| 79 | } else { | ||
| 80 | glDrawArrays(primitive_mode, vertex_first, count); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | } | ||
| 84 | }; | ||
| 85 | |||
| 86 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 52 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 87 | const GLShader::ConstBufferEntry& entry) { | 53 | const GLShader::ConstBufferEntry& entry) { |
| 88 | if (!entry.IsIndirect()) { | 54 | if (!entry.IsIndirect()) { |
| @@ -270,29 +236,6 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 270 | return offset; | 236 | return offset; |
| 271 | } | 237 | } |
| 272 | 238 | ||
| 273 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { | ||
| 274 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 275 | const auto& regs = gpu.regs; | ||
| 276 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | ||
| 277 | |||
| 278 | DrawParameters params{}; | ||
| 279 | params.current_instance = gpu.state.current_instance; | ||
| 280 | |||
| 281 | params.use_indexed = is_indexed; | ||
| 282 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | ||
| 283 | |||
| 284 | if (is_indexed) { | ||
| 285 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | ||
| 286 | params.count = regs.index_array.count; | ||
| 287 | params.index_buffer_offset = index_buffer_offset; | ||
| 288 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | ||
| 289 | } else { | ||
| 290 | params.count = regs.vertex_buffer.count; | ||
| 291 | params.vertex_first = regs.vertex_buffer.first; | ||
| 292 | } | ||
| 293 | return params; | ||
| 294 | } | ||
| 295 | |||
| 296 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 239 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 297 | MICROPROFILE_SCOPE(OpenGL_Shader); | 240 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 298 | auto& gpu = system.GPU().Maxwell3D(); | 241 | auto& gpu = system.GPU().Maxwell3D(); |
| @@ -399,12 +342,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { | |||
| 399 | static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); | 342 | static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); |
| 400 | } | 343 | } |
| 401 | 344 | ||
| 402 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||
| 403 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||
| 404 | DrawArrays(); | ||
| 405 | return true; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <typename Map, typename Interval> | 345 | template <typename Map, typename Interval> |
| 409 | static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | 346 | static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { |
| 410 | return boost::make_iterator_range(map.equal_range(interval)); | 347 | return boost::make_iterator_range(map.equal_range(interval)); |
| @@ -640,17 +577,9 @@ void RasterizerOpenGL::Clear() { | |||
| 640 | } | 577 | } |
| 641 | } | 578 | } |
| 642 | 579 | ||
| 643 | void RasterizerOpenGL::DrawArrays() { | 580 | void RasterizerOpenGL::DrawPrelude() { |
| 644 | if (accelerate_draw == AccelDraw::Disabled) | ||
| 645 | return; | ||
| 646 | |||
| 647 | MICROPROFILE_SCOPE(OpenGL_Drawing); | ||
| 648 | auto& gpu = system.GPU().Maxwell3D(); | 581 | auto& gpu = system.GPU().Maxwell3D(); |
| 649 | 582 | ||
| 650 | if (!gpu.ShouldExecute()) { | ||
| 651 | return; | ||
| 652 | } | ||
| 653 | |||
| 654 | SyncColorMask(); | 583 | SyncColorMask(); |
| 655 | SyncFragmentColorClampState(); | 584 | SyncFragmentColorClampState(); |
| 656 | SyncMultiSampleState(); | 585 | SyncMultiSampleState(); |
| @@ -695,10 +624,7 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 695 | // Upload vertex and index data. | 624 | // Upload vertex and index data. |
| 696 | SetupVertexBuffer(vao); | 625 | SetupVertexBuffer(vao); |
| 697 | SetupVertexInstances(vao); | 626 | SetupVertexInstances(vao); |
| 698 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | 627 | index_buffer_offset = SetupIndexBuffer(); |
| 699 | |||
| 700 | // Setup draw parameters. It will automatically choose what glDraw* method to use. | ||
| 701 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 702 | 628 | ||
| 703 | // Prepare packed bindings. | 629 | // Prepare packed bindings. |
| 704 | bind_ubo_pushbuffer.Setup(0); | 630 | bind_ubo_pushbuffer.Setup(0); |
| @@ -706,7 +632,8 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 706 | 632 | ||
| 707 | // Setup shaders and their used resources. | 633 | // Setup shaders and their used resources. |
| 708 | texture_cache.GuardSamplers(true); | 634 | texture_cache.GuardSamplers(true); |
| 709 | SetupShaders(params.primitive_mode); | 635 | const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); |
| 636 | SetupShaders(primitive_mode); | ||
| 710 | texture_cache.GuardSamplers(false); | 637 | texture_cache.GuardSamplers(false); |
| 711 | 638 | ||
| 712 | ConfigureFramebuffers(); | 639 | ConfigureFramebuffers(); |
| @@ -730,11 +657,107 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 730 | if (texture_cache.TextureBarrier()) { | 657 | if (texture_cache.TextureBarrier()) { |
| 731 | glTextureBarrier(); | 658 | glTextureBarrier(); |
| 732 | } | 659 | } |
| 660 | } | ||
| 661 | |||
| 662 | struct DrawParams { | ||
| 663 | bool is_indexed{}; | ||
| 664 | bool is_instanced{}; | ||
| 665 | GLenum primitive_mode{}; | ||
| 666 | GLint count{}; | ||
| 667 | GLint base_vertex{}; | ||
| 668 | |||
| 669 | // Indexed settings | ||
| 670 | GLenum index_format{}; | ||
| 671 | GLintptr index_buffer_offset{}; | ||
| 672 | |||
| 673 | // Instanced setting | ||
| 674 | GLint num_instances{}; | ||
| 675 | GLint base_instance{}; | ||
| 676 | |||
| 677 | void DispatchDraw() { | ||
| 678 | if (is_indexed) { | ||
| 679 | const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); | ||
| 680 | if (is_instanced) { | ||
| 681 | glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, | ||
| 682 | index_buffer_ptr, num_instances, | ||
| 683 | base_vertex, base_instance); | ||
| 684 | } else { | ||
| 685 | glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, | ||
| 686 | base_vertex); | ||
| 687 | } | ||
| 688 | } else { | ||
| 689 | if (is_instanced) { | ||
| 690 | glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances, | ||
| 691 | base_instance); | ||
| 692 | } else { | ||
| 693 | glDrawArrays(primitive_mode, base_vertex, count); | ||
| 694 | } | ||
| 695 | } | ||
| 696 | } | ||
| 697 | }; | ||
| 733 | 698 | ||
| 734 | params.DispatchDraw(); | 699 | bool RasterizerOpenGL::DrawBatch(bool is_indexed) { |
| 700 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||
| 701 | |||
| 702 | MICROPROFILE_SCOPE(OpenGL_Drawing); | ||
| 735 | 703 | ||
| 704 | DrawPrelude(); | ||
| 705 | |||
| 706 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 707 | const auto& regs = maxwell3d.regs; | ||
| 708 | const auto current_instance = maxwell3d.state.current_instance; | ||
| 709 | DrawParams draw_call{}; | ||
| 710 | draw_call.is_indexed = is_indexed; | ||
| 711 | draw_call.num_instances = static_cast<GLint>(1); | ||
| 712 | draw_call.base_instance = static_cast<GLint>(current_instance); | ||
| 713 | draw_call.is_instanced = current_instance > 0; | ||
| 714 | draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | ||
| 715 | if (draw_call.is_indexed) { | ||
| 716 | draw_call.count = static_cast<GLint>(regs.index_array.count); | ||
| 717 | draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); | ||
| 718 | draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | ||
| 719 | draw_call.index_buffer_offset = index_buffer_offset; | ||
| 720 | } else { | ||
| 721 | draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); | ||
| 722 | draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); | ||
| 723 | } | ||
| 724 | draw_call.DispatchDraw(); | ||
| 725 | |||
| 726 | maxwell3d.dirty.memory_general = false; | ||
| 736 | accelerate_draw = AccelDraw::Disabled; | 727 | accelerate_draw = AccelDraw::Disabled; |
| 737 | gpu.dirty.memory_general = false; | 728 | return true; |
| 729 | } | ||
| 730 | |||
| 731 | bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { | ||
| 732 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||
| 733 | |||
| 734 | MICROPROFILE_SCOPE(OpenGL_Drawing); | ||
| 735 | |||
| 736 | DrawPrelude(); | ||
| 737 | |||
| 738 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 739 | const auto& regs = maxwell3d.regs; | ||
| 740 | const auto& draw_setup = maxwell3d.mme_draw; | ||
| 741 | DrawParams draw_call{}; | ||
| 742 | draw_call.is_indexed = is_indexed; | ||
| 743 | draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count); | ||
| 744 | draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance); | ||
| 745 | draw_call.is_instanced = draw_setup.instance_count > 1; | ||
| 746 | draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | ||
| 747 | if (draw_call.is_indexed) { | ||
| 748 | draw_call.count = static_cast<GLint>(regs.index_array.count); | ||
| 749 | draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base); | ||
| 750 | draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | ||
| 751 | draw_call.index_buffer_offset = index_buffer_offset; | ||
| 752 | } else { | ||
| 753 | draw_call.count = static_cast<GLint>(regs.vertex_buffer.count); | ||
| 754 | draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first); | ||
| 755 | } | ||
| 756 | draw_call.DispatchDraw(); | ||
| 757 | |||
| 758 | maxwell3d.dirty.memory_general = false; | ||
| 759 | accelerate_draw = AccelDraw::Disabled; | ||
| 760 | return true; | ||
| 738 | } | 761 | } |
| 739 | 762 | ||
| 740 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 763 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f5c7f864..9c10ebda3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -57,7 +57,8 @@ public: | |||
| 57 | ScreenInfo& info); | 57 | ScreenInfo& info); |
| 58 | ~RasterizerOpenGL() override; | 58 | ~RasterizerOpenGL() override; |
| 59 | 59 | ||
| 60 | void DrawArrays() override; | 60 | bool DrawBatch(bool is_indexed) override; |
| 61 | bool DrawMultiBatch(bool is_indexed) override; | ||
| 61 | void Clear() override; | 62 | void Clear() override; |
| 62 | void DispatchCompute(GPUVAddr code_addr) override; | 63 | void DispatchCompute(GPUVAddr code_addr) override; |
| 63 | void FlushAll() override; | 64 | void FlushAll() override; |
| @@ -71,7 +72,6 @@ public: | |||
| 71 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 72 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 72 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 73 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 73 | u32 pixel_stride) override; | 74 | u32 pixel_stride) override; |
| 74 | bool AccelerateDrawBatch(bool is_indexed) override; | ||
| 75 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | 75 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; |
| 76 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 76 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 77 | const VideoCore::DiskResourceLoadCallback& callback) override; | 77 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| @@ -105,6 +105,9 @@ private: | |||
| 105 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 105 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 106 | std::size_t size); | 106 | std::size_t size); |
| 107 | 107 | ||
| 108 | /// Syncs all the state, shaders, render targets and textures setting before a draw call. | ||
| 109 | void DrawPrelude(); | ||
| 110 | |||
| 108 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 111 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 109 | /// usage. | 112 | /// usage. |
| 110 | TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 113 | TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| @@ -216,7 +219,7 @@ private: | |||
| 216 | 219 | ||
| 217 | GLintptr SetupIndexBuffer(); | 220 | GLintptr SetupIndexBuffer(); |
| 218 | 221 | ||
| 219 | DrawParameters SetupDraw(GLintptr index_buffer_offset); | 222 | GLintptr index_buffer_offset; |
| 220 | 223 | ||
| 221 | void SetupShaders(GLenum primitive_mode); | 224 | void SetupShaders(GLenum primitive_mode); |
| 222 | 225 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 76439e7ab..74cb59bc1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -462,6 +462,14 @@ private: | |||
| 462 | code.AddLine("float gl_PointSize;"); | 462 | code.AddLine("float gl_PointSize;"); |
| 463 | } | 463 | } |
| 464 | 464 | ||
| 465 | if (ir.UsesInstanceId()) { | ||
| 466 | code.AddLine("int gl_InstanceID;"); | ||
| 467 | } | ||
| 468 | |||
| 469 | if (ir.UsesVertexId()) { | ||
| 470 | code.AddLine("int gl_VertexID;"); | ||
| 471 | } | ||
| 472 | |||
| 465 | --code.scope; | 473 | --code.scope; |
| 466 | code.AddLine("}};"); | 474 | code.AddLine("}};"); |
| 467 | code.AddNewLine(); | 475 | code.AddNewLine(); |
| @@ -964,7 +972,7 @@ private: | |||
| 964 | switch (element) { | 972 | switch (element) { |
| 965 | case 2: | 973 | case 2: |
| 966 | // Config pack's first value is instance_id. | 974 | // Config pack's first value is instance_id. |
| 967 | return {"config_pack[0]", Type::Uint}; | 975 | return {"gl_InstanceID", Type::Int}; |
| 968 | case 3: | 976 | case 3: |
| 969 | return {"gl_VertexID", Type::Int}; | 977 | return {"gl_VertexID", Type::Int}; |
| 970 | } | 978 | } |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index bbbab0bca..2c357f310 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -114,6 +114,18 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff | |||
| 114 | break; | 114 | break; |
| 115 | } | 115 | } |
| 116 | } | 116 | } |
| 117 | if (index == Attribute::Index::TessCoordInstanceIDVertexID) { | ||
| 118 | switch (element) { | ||
| 119 | case 2: | ||
| 120 | uses_instance_id = true; | ||
| 121 | break; | ||
| 122 | case 3: | ||
| 123 | uses_vertex_id = true; | ||
| 124 | break; | ||
| 125 | default: | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | } | ||
| 117 | if (index == Attribute::Index::ClipDistances0123 || | 129 | if (index == Attribute::Index::ClipDistances0123 || |
| 118 | index == Attribute::Index::ClipDistances4567) { | 130 | index == Attribute::Index::ClipDistances4567) { |
| 119 | const auto clip_index = | 131 | const auto clip_index = |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6aed9bb84..2f03d83ba 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -124,6 +124,14 @@ public: | |||
| 124 | return uses_point_size; | 124 | return uses_point_size; |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | bool UsesInstanceId() const { | ||
| 128 | return uses_instance_id; | ||
| 129 | } | ||
| 130 | |||
| 131 | bool UsesVertexId() const { | ||
| 132 | return uses_vertex_id; | ||
| 133 | } | ||
| 134 | |||
| 127 | bool HasPhysicalAttributes() const { | 135 | bool HasPhysicalAttributes() const { |
| 128 | return uses_physical_attributes; | 136 | return uses_physical_attributes; |
| 129 | } | 137 | } |
| @@ -373,6 +381,8 @@ private: | |||
| 373 | bool uses_viewport_index{}; | 381 | bool uses_viewport_index{}; |
| 374 | bool uses_point_size{}; | 382 | bool uses_point_size{}; |
| 375 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 383 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 384 | bool uses_instance_id{}; | ||
| 385 | bool uses_vertex_id{}; | ||
| 376 | 386 | ||
| 377 | Tegra::Shader::Header header; | 387 | Tegra::Shader::Header header; |
| 378 | }; | 388 | }; |