summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp74
-rw-r--r--src/video_core/engines/maxwell_3d.h23
-rw-r--r--src/video_core/macro_interpreter.cpp2
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp2
7 files changed, 192 insertions, 11 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c7a3c85a0..48fc1a9e1 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -92,6 +92,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
92 92
93 // Some games (like Super Mario Odyssey) assume that SRGB is enabled. 93 // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
94 regs.framebuffer_srgb = 1; 94 regs.framebuffer_srgb = 1;
95 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
96 mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
97 mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true;
98 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
95} 99}
96 100
97#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) 101#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
@@ -416,6 +420,76 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
416 } 420 }
417} 421}
418 422
423void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
424 const u32 method = method_call.method;
425 if (mme_inline[method]) {
426 regs.reg_array[method] = method_call.argument;
427 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
428 method == MAXWELL3D_REG_INDEX(index_array.count)) {
429 MMMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
430 ? MMMEDrawMode::Array
431 : MMMEDrawMode::Indexed;
432 u32 count = method_call.argument;
433 while (true) {
434 if (mme_draw.current_mode == MMMEDrawMode::Undefined) {
435 mme_draw.current_mode = expected_mode;
436 mme_draw.current_count = count;
437 mme_draw.instance_count = 1;
438 break;
439 } else {
440 if (mme_draw.current_mode == expected_mode && count == mme_draw.current_count) {
441 mme_draw.instance_count++;
442 break;
443 } else {
444 FlushMMEInlineDraw();
445 }
446 }
447 }
448 }
449 } else {
450 if (mme_draw.current_mode != MMMEDrawMode::Undefined) {
451 FlushMMEInlineDraw();
452 }
453 CallMethod(method_call);
454 }
455}
456
457void Maxwell3D::FlushMMEInlineDraw() {
458 LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
459 regs.vertex_buffer.count);
460 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
461
462 auto debug_context = system.GetGPUDebugContext();
463
464 if (debug_context) {
465 debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr);
466 }
467
468 // Both instance configuration registers can not be set at the same time.
469 ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont,
470 "Illegal combination of instancing parameters");
471
472 const bool is_indexed = mme_draw.current_mode == MMMEDrawMode::Indexed;
473 rasterizer.AccelerateDrawMultiBatch(is_indexed);
474
475 if (debug_context) {
476 debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
477 }
478
479 // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
480 // the game is trying to draw indexed or direct mode. This needs to be verified on HW still -
481 // it's possible that it is incorrect and that there is some other register used to specify the
482 // drawing mode.
483 if (is_indexed) {
484 regs.index_array.count = 0;
485 } else {
486 regs.vertex_buffer.count = 0;
487 }
488 mme_draw.current_mode = MMMEDrawMode::Undefined;
489 mme_draw.current_count = 0;
490 mme_draw.instance_count = 0;
491}
492
419void Maxwell3D::ProcessMacroUpload(u32 data) { 493void Maxwell3D::ProcessMacroUpload(u32 data) {
420 ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), 494 ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
421 "upload_address exceeded macro_memory size!"); 495 "upload_address exceeded macro_memory size!");
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e5ec90717..1547d930e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -811,8 +811,9 @@ public:
811 INSERT_PADDING_WORDS(0x21); 811 INSERT_PADDING_WORDS(0x21);
812 812
813 u32 vb_element_base; 813 u32 vb_element_base;
814 u32 vb_base_instance;
814 815
815 INSERT_PADDING_WORDS(0x36); 816 INSERT_PADDING_WORDS(0x35);
816 817
817 union { 818 union {
818 BitField<0, 1, u32> c0; 819 BitField<0, 1, u32> c0;
@@ -1238,6 +1239,11 @@ public:
1238 /// Write the value to the register identified by method. 1239 /// Write the value to the register identified by method.
1239 void CallMethod(const GPU::MethodCall& method_call); 1240 void CallMethod(const GPU::MethodCall& method_call);
1240 1241
1242 /// Write the value to the register identified by method.
1243 void CallMethodFromMME(const GPU::MethodCall& method_call);
1244
1245 void FlushMMEInlineDraw();
1246
1241 /// Given a Texture Handle, returns the TSC and TIC entries. 1247 /// Given a Texture Handle, returns the TSC and TIC entries.
1242 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, 1248 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
1243 std::size_t offset) const; 1249 std::size_t offset) const;
@@ -1263,6 +1269,18 @@ public:
1263 return execute_on; 1269 return execute_on;
1264 } 1270 }
1265 1271
1272 enum class MMMEDrawMode : u32 {
1273 Undefined,
1274 Array,
1275 Indexed,
1276 };
1277
1278 struct MMEDrawState {
1279 MMMEDrawMode current_mode{MMMEDrawMode::Undefined};
1280 u32 current_count;
1281 u32 instance_count;
1282 } mme_draw;
1283
1266private: 1284private:
1267 void InitializeRegisterDefaults(); 1285 void InitializeRegisterDefaults();
1268 1286
@@ -1275,6 +1293,8 @@ private:
1275 /// Start offsets of each macro in macro_memory 1293 /// Start offsets of each macro in macro_memory
1276 std::array<u32, 0x80> macro_positions = {}; 1294 std::array<u32, 0x80> macro_positions = {};
1277 1295
1296 std::array<bool, Regs::NUM_REGS> mme_inline{};
1297
1278 /// Memory for macro code 1298 /// Memory for macro code
1279 MacroMemory macro_memory; 1299 MacroMemory macro_memory;
1280 1300
@@ -1402,6 +1422,7 @@ ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
1402ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); 1422ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1403ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1423ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1404ASSERT_REG_POSITION(vb_element_base, 0x50D); 1424ASSERT_REG_POSITION(vb_element_base, 0x50D);
1425ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1405ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1426ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1406ASSERT_REG_POSITION(point_size, 0x546); 1427ASSERT_REG_POSITION(point_size, 0x546);
1407ASSERT_REG_POSITION(zeta_enable, 0x54E); 1428ASSERT_REG_POSITION(zeta_enable, 0x54E);
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 62afc0d11..dbaeac6db 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -257,7 +257,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) {
257} 257}
258 258
259void MacroInterpreter::Send(u32 value) { 259void MacroInterpreter::Send(u32 value) {
260 maxwell3d.CallMethod({method_address.address, value}); 260 maxwell3d.CallMethodFromMME({method_address.address, value});
261 // Increment the method address by the method increment. 261 // Increment the method address by the method increment.
262 method_address.address.Assign(method_address.address.Value() + 262 method_address.address.Assign(method_address.address.Value() +
263 method_address.increment.Value()); 263 method_address.increment.Value());
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6b3f2d50a..a37b84b8c 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -31,6 +31,9 @@ public:
31 /// Draw the current batch of vertex arrays 31 /// Draw the current batch of vertex arrays
32 virtual void DrawArrays() = 0; 32 virtual void DrawArrays() = 0;
33 33
34 /// Draw the current batch of vertex arrays
35 virtual void DrawMultiArrays() = 0;
36
34 /// Clear the current framebuffer 37 /// Clear the current framebuffer
35 virtual void Clear() = 0; 38 virtual void Clear() = 0;
36 39
@@ -73,6 +76,10 @@ public:
73 return false; 76 return false;
74 } 77 }
75 78
79 virtual bool AccelerateDrawMultiBatch(bool is_indexed) {
80 return false;
81 }
82
76 /// Increase/decrease the number of object in pages touching the specified region 83 /// Increase/decrease the number of object in pages touching the specified region
77 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} 84 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
78 85
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4dd08bccb..5df7f3f56 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -405,6 +405,12 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
405 return true; 405 return true;
406} 406}
407 407
408bool RasterizerOpenGL::AccelerateDrawMultiBatch(bool is_indexed) {
409 accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
410 DrawMultiArrays();
411 return true;
412}
413
408template <typename Map, typename Interval> 414template <typename Map, typename Interval>
409static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { 415static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
410 return boost::make_iterator_range(map.equal_range(interval)); 416 return boost::make_iterator_range(map.equal_range(interval));
@@ -688,7 +694,7 @@ void RasterizerOpenGL::Clear() {
688 } 694 }
689} 695}
690 696
691void RasterizerOpenGL::DrawArrays() { 697void RasterizerOpenGL::DrawPrelude() {
692 if (accelerate_draw == AccelDraw::Disabled) 698 if (accelerate_draw == AccelDraw::Disabled)
693 return; 699 return;
694 700
@@ -743,10 +749,7 @@ void RasterizerOpenGL::DrawArrays() {
743 // Upload vertex and index data. 749 // Upload vertex and index data.
744 SetupVertexBuffer(vao); 750 SetupVertexBuffer(vao);
745 SetupVertexInstances(vao); 751 SetupVertexInstances(vao);
746 const GLintptr index_buffer_offset = SetupIndexBuffer(); 752 index_buffer_offset = SetupIndexBuffer();
747
748 // Setup draw parameters. It will automatically choose what glDraw* method to use.
749 const DrawParameters params = SetupDraw(index_buffer_offset);
750 753
751 // Prepare packed bindings. 754 // Prepare packed bindings.
752 bind_ubo_pushbuffer.Setup(0); 755 bind_ubo_pushbuffer.Setup(0);
@@ -754,7 +757,8 @@ void RasterizerOpenGL::DrawArrays() {
754 757
755 // Setup shaders and their used resources. 758 // Setup shaders and their used resources.
756 texture_cache.GuardSamplers(true); 759 texture_cache.GuardSamplers(true);
757 SetupShaders(params.primitive_mode); 760 const auto primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
761 SetupShaders(primitive_mode);
758 texture_cache.GuardSamplers(false); 762 texture_cache.GuardSamplers(false);
759 763
760 ConfigureFramebuffers(state); 764 ConfigureFramebuffers(state);
@@ -778,11 +782,80 @@ void RasterizerOpenGL::DrawArrays() {
778 if (texture_cache.TextureBarrier()) { 782 if (texture_cache.TextureBarrier()) {
779 glTextureBarrier(); 783 glTextureBarrier();
780 } 784 }
785}
781 786
782 params.DispatchDraw(); 787void RasterizerOpenGL::DrawArrays() {
788 DrawPrelude();
789
790 auto& maxwell3d = system.GPU().Maxwell3D();
791 auto& regs = maxwell3d.regs;
792 auto current_instance = maxwell3d.state.current_instance;
793 auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
794 if (accelerate_draw == AccelDraw::Indexed) {
795 auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
796 auto count = regs.index_array.count;
797 auto base_vertex = static_cast<GLint>(regs.vb_element_base);
798 const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
799 if (current_instance > 0) {
800 glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
801 index_buffer_ptr, 1, base_vertex,
802 current_instance);
803 } else {
804 glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
805 base_vertex);
806 }
807 } else {
808 auto count = regs.vertex_buffer.count;
809 auto vertex_first = regs.vertex_buffer.first;
810 if (current_instance > 0) {
811 glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
812 current_instance);
813 } else {
814 glDrawArrays(primitive_mode, vertex_first, count);
815 }
816 }
817
818 accelerate_draw = AccelDraw::Disabled;
819 maxwell3d.dirty.memory_general = false;
820}
821
822#pragma optimize("", off)
823
824void RasterizerOpenGL::DrawMultiArrays() {
825 DrawPrelude();
826
827 auto& maxwell3d = system.GPU().Maxwell3D();
828 auto& regs = maxwell3d.regs;
829 auto& draw_setup = maxwell3d.mme_draw;
830 auto num_instances = draw_setup.instance_count;
831 auto base_instance = static_cast<GLint>(regs.vb_base_instance);
832 auto primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
833 if (draw_setup.current_mode == Tegra::Engines::Maxwell3D::MMMEDrawMode::Indexed) {
834 auto index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
835 auto count = regs.index_array.count;
836 auto base_vertex = static_cast<GLint>(regs.vb_element_base);
837 const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
838 if (num_instances > 1) {
839 glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
840 index_buffer_ptr, num_instances,
841 base_vertex, base_instance);
842 } else {
843 glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
844 base_vertex);
845 }
846 } else {
847 auto count = regs.vertex_buffer.count;
848 auto vertex_first = regs.vertex_buffer.first;
849 if (num_instances > 1) {
850 glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, num_instances,
851 base_instance);
852 } else {
853 glDrawArrays(primitive_mode, vertex_first, count);
854 }
855 }
783 856
784 accelerate_draw = AccelDraw::Disabled; 857 accelerate_draw = AccelDraw::Disabled;
785 gpu.dirty.memory_general = false; 858 maxwell3d.dirty.memory_general = false;
786} 859}
787 860
788void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 861void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index eada752e0..63a914ff9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -58,6 +58,7 @@ public:
58 ~RasterizerOpenGL() override; 58 ~RasterizerOpenGL() override;
59 59
60 void DrawArrays() override; 60 void DrawArrays() override;
61 void DrawMultiArrays() override;
61 void Clear() override; 62 void Clear() override;
62 void DispatchCompute(GPUVAddr code_addr) override; 63 void DispatchCompute(GPUVAddr code_addr) override;
63 void FlushAll() override; 64 void FlushAll() override;
@@ -72,6 +73,7 @@ public:
72 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 73 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
73 u32 pixel_stride) override; 74 u32 pixel_stride) override;
74 bool AccelerateDrawBatch(bool is_indexed) override; 75 bool AccelerateDrawBatch(bool is_indexed) override;
76 bool AccelerateDrawMultiBatch(bool is_indexed) override;
75 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; 77 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
76 void LoadDiskResources(const std::atomic_bool& stop_loading, 78 void LoadDiskResources(const std::atomic_bool& stop_loading,
77 const VideoCore::DiskResourceLoadCallback& callback) override; 79 const VideoCore::DiskResourceLoadCallback& callback) override;
@@ -136,6 +138,8 @@ private:
136 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 138 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
137 std::size_t size); 139 std::size_t size);
138 140
141 void DrawPrelude();
142
139 /// Configures the current textures to use for the draw command. Returns shaders texture buffer 143 /// Configures the current textures to use for the draw command. Returns shaders texture buffer
140 /// usage. 144 /// usage.
141 TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 145 TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
@@ -252,6 +256,8 @@ private:
252 256
253 DrawParameters SetupDraw(GLintptr index_buffer_offset); 257 DrawParameters SetupDraw(GLintptr index_buffer_offset);
254 258
259 GLintptr index_buffer_offset;
260
255 void SetupShaders(GLenum primitive_mode); 261 void SetupShaders(GLenum primitive_mode);
256 262
257 enum class AccelDraw { Disabled, Arrays, Indexed }; 263 enum class AccelDraw { Disabled, Arrays, Indexed };
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 14834d86a..62e32697e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -964,7 +964,7 @@ private:
964 switch (element) { 964 switch (element) {
965 case 2: 965 case 2:
966 // Config pack's first value is instance_id. 966 // Config pack's first value is instance_id.
967 return {"config_pack[0]", Type::Uint}; 967 return {"gl_InstanceID", Type::Uint};
968 case 3: 968 case 3:
969 return {"gl_VertexID", Type::Int}; 969 return {"gl_VertexID", Type::Int};
970 } 970 }