summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h6
-rw-r--r--src/video_core/dma_pusher.cpp9
-rw-r--r--src/video_core/dma_pusher.h2
-rw-r--r--src/video_core/engines/draw_manager.cpp6
-rw-r--r--src/video_core/engines/draw_manager.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp37
-rw-r--r--src/video_core/engines/maxwell_3d.h21
-rw-r--r--src/video_core/macro/macro.cpp7
-rw-r--r--src/video_core/macro/macro.h1
-rw-r--r--src/video_core/macro/macro_hle.cpp445
-rw-r--r--src/video_core/macro/macro_hle.h5
11 files changed, 420 insertions, 121 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 557227b37..98343628c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1574,7 +1574,11 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1574 if (!is_async) { 1574 if (!is_async) {
1575 return; 1575 return;
1576 } 1576 }
1577 uncommitted_ranges.add(base_interval); 1577 const bool is_high_accuracy =
1578 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
1579 if (is_high_accuracy) {
1580 uncommitted_ranges.add(base_interval);
1581 }
1578} 1582}
1579 1583
1580template <class P> 1584template <class P>
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 13ff64fa3..5ad40abaa 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -94,10 +94,10 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
94 94
95 if (dma_state.method_count) { 95 if (dma_state.method_count) {
96 // Data word of methods command 96 // Data word of methods command
97 dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
97 if (dma_state.non_incrementing) { 98 if (dma_state.non_incrementing) {
98 const u32 max_write = static_cast<u32>( 99 const u32 max_write = static_cast<u32>(
99 std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); 100 std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
100 dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
101 CallMultiMethod(&command_header.argument, max_write); 101 CallMultiMethod(&command_header.argument, max_write);
102 dma_state.method_count -= max_write; 102 dma_state.method_count -= max_write;
103 dma_state.is_last_call = true; 103 dma_state.is_last_call = true;
@@ -133,6 +133,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
133 case SubmissionMode::Inline: 133 case SubmissionMode::Inline:
134 dma_state.method = command_header.method; 134 dma_state.method = command_header.method;
135 dma_state.subchannel = command_header.subchannel; 135 dma_state.subchannel = command_header.subchannel;
136 dma_state.dma_word_offset = static_cast<u64>(
137 -static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
136 CallMethod(command_header.arg_count); 138 CallMethod(command_header.arg_count);
137 dma_state.non_incrementing = true; 139 dma_state.non_incrementing = true;
138 dma_increment_once = false; 140 dma_increment_once = false;
@@ -165,8 +167,9 @@ void DmaPusher::CallMethod(u32 argument) const {
165 dma_state.method_count, 167 dma_state.method_count,
166 }); 168 });
167 } else { 169 } else {
168 subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument, 170 auto subchannel = subchannels[dma_state.subchannel];
169 dma_state.is_last_call); 171 subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
172 subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
170 } 173 }
171} 174}
172 175
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index da7728ded..1cdb690ed 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -157,7 +157,7 @@ private:
157 u32 method_count; ///< Current method count 157 u32 method_count; ///< Current method count
158 u32 length_pending; ///< Large NI command length pending 158 u32 length_pending; ///< Large NI command length pending
159 GPUVAddr dma_get; ///< Currently read segment 159 GPUVAddr dma_get; ///< Currently read segment
160 u32 dma_word_offset; ///< Current word ofset from address 160 u64 dma_word_offset; ///< Current word ofset from address
161 bool non_incrementing; ///< Current command's NI flag 161 bool non_incrementing; ///< Current command's NI flag
162 bool is_last_call; 162 bool is_last_call;
163 }; 163 };
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index c60f32aad..183d5403c 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -91,6 +91,12 @@ void DrawManager::DrawIndex(PrimitiveTopology topology, u32 index_first, u32 ind
91 ProcessDraw(true, num_instances); 91 ProcessDraw(true, num_instances);
92} 92}
93 93
94void DrawManager::DrawArrayIndirect(PrimitiveTopology topology) {
95 draw_state.topology = topology;
96
97 ProcessDrawIndirect(true);
98}
99
94void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) { 100void DrawManager::DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count) {
95 const auto& regs{maxwell3d->regs}; 101 const auto& regs{maxwell3d->regs};
96 draw_state.topology = topology; 102 draw_state.topology = topology;
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 437990162..49a4fca48 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -56,6 +56,8 @@ public:
56 void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index, 56 void DrawIndex(PrimitiveTopology topology, u32 index_first, u32 index_count, u32 base_index,
57 u32 base_instance, u32 num_instances); 57 u32 base_instance, u32 num_instances);
58 58
59 void DrawArrayIndirect(PrimitiveTopology topology);
60
59 void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count); 61 void DrawIndexedIndirect(PrimitiveTopology topology, u32 index_first, u32 index_count);
60 62
61 const State& GetDrawState() const { 63 const State& GetDrawState() const {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index cd6274a9b..b998a8e69 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -133,15 +133,52 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
133 for (size_t i = 0; i < amount; i++) { 133 for (size_t i = 0; i < amount; i++) {
134 macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); 134 macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
135 } 135 }
136 macro_segments.emplace_back(current_dma_segment, amount);
136 137
137 // Call the macro when there are no more parameters in the command buffer 138 // Call the macro when there are no more parameters in the command buffer
138 if (is_last_call) { 139 if (is_last_call) {
139 CallMacroMethod(executing_macro, macro_params); 140 CallMacroMethod(executing_macro, macro_params);
140 macro_params.clear(); 141 macro_params.clear();
141 macro_addresses.clear(); 142 macro_addresses.clear();
143 macro_segments.clear();
142 } 144 }
143} 145}
144 146
147void Maxwell3D::RefreshParameters() {
148 size_t current_index = 0;
149 for (auto& segment : macro_segments) {
150 if (segment.first == 0) {
151 current_index += segment.second;
152 continue;
153 }
154 memory_manager.ReadBlock(segment.first, &macro_params[current_index],
155 sizeof(u32) * segment.second);
156 current_index += segment.second;
157 }
158}
159
160u32 Maxwell3D::GetMaxCurrentVertices() {
161 u32 num_vertices = 0;
162 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
163 const auto& array = regs.vertex_streams[index];
164 if (array.enable == 0) {
165 continue;
166 }
167 const auto& attribute = regs.vertex_attrib_format[index];
168 if (attribute.constant) {
169 num_vertices = std::max(num_vertices, 1U);
170 continue;
171 }
172 const auto& limit = regs.vertex_stream_limits[index];
173 const GPUVAddr gpu_addr_begin = array.Address();
174 const GPUVAddr gpu_addr_end = limit.Address() + 1;
175 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
176 num_vertices = std::max(
177 num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
178 }
179 return num_vertices;
180}
181
145u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { 182u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
146 // Keep track of the register value in shadow_state when requested. 183 // Keep track of the register value in shadow_state when requested.
147 const auto control = shadow_state.shadow_ram_control; 184 const auto control = shadow_state.shadow_ram_control;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ac5e87563..e2256594d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3068,10 +3068,14 @@ public:
3068 friend class DrawManager; 3068 friend class DrawManager;
3069 3069
3070 std::vector<u8> inline_index_draw_indexes; 3070 std::vector<u8> inline_index_draw_indexes;
3071 std::vector<GPUVAddr> macro_addresses;
3072 3071
3073 Core::System& system; 3072 GPUVAddr getMacroAddress(size_t index) const {
3074 MemoryManager& memory_manager; 3073 return macro_addresses[index];
3074 }
3075
3076 void RefreshParameters();
3077
3078 u32 GetMaxCurrentVertices();
3075 3079
3076 /// Handles a write to the CLEAR_BUFFERS register. 3080 /// Handles a write to the CLEAR_BUFFERS register.
3077 void ProcessClearBuffers(u32 layer_count); 3081 void ProcessClearBuffers(u32 layer_count);
@@ -3135,6 +3139,9 @@ private:
3135 /// Returns a query's value or an empty object if the value will be deferred through a cache. 3139 /// Returns a query's value or an empty object if the value will be deferred through a cache.
3136 std::optional<u64> GetQueryResult(); 3140 std::optional<u64> GetQueryResult();
3137 3141
3142 Core::System& system;
3143 MemoryManager& memory_manager;
3144
3138 VideoCore::RasterizerInterface* rasterizer = nullptr; 3145 VideoCore::RasterizerInterface* rasterizer = nullptr;
3139 3146
3140 /// Start offsets of each macro in macro_memory 3147 /// Start offsets of each macro in macro_memory
@@ -3151,6 +3158,14 @@ private:
3151 Upload::State upload_state; 3158 Upload::State upload_state;
3152 3159
3153 bool execute_on{true}; 3160 bool execute_on{true};
3161
3162 std::array<bool, Regs::NUM_REGS> draw_command{};
3163 std::vector<u32> deferred_draw_method;
3164 enum class DrawMode : u32 { General = 0, Instance, InlineIndex };
3165 DrawMode draw_mode{DrawMode::General};
3166 bool draw_indexed{};
3167 std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
3168 std::vector<GPUVAddr> macro_addresses;
3154}; 3169};
3155 3170
3156#define ASSERT_REG_POSITION(field_name, position) \ 3171#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 505d81c1e..01dd25f95 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -13,6 +13,7 @@
13#include "common/fs/fs.h" 13#include "common/fs/fs.h"
14#include "common/fs/path_util.h" 14#include "common/fs/path_util.h"
15#include "common/settings.h" 15#include "common/settings.h"
16#include "video_core/engines/maxwell_3d.h"
16#include "video_core/macro/macro.h" 17#include "video_core/macro/macro.h"
17#include "video_core/macro/macro_hle.h" 18#include "video_core/macro/macro_hle.h"
18#include "video_core/macro/macro_interpreter.h" 19#include "video_core/macro/macro_interpreter.h"
@@ -40,8 +41,8 @@ static void Dump(u64 hash, std::span<const u32> code) {
40 macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes()); 41 macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
41} 42}
42 43
43MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) 44MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_)
44 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} 45 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {}
45 46
46MacroEngine::~MacroEngine() = default; 47MacroEngine::~MacroEngine() = default;
47 48
@@ -61,6 +62,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
61 if (cache_info.has_hle_program) { 62 if (cache_info.has_hle_program) {
62 cache_info.hle_program->Execute(parameters, method); 63 cache_info.hle_program->Execute(parameters, method);
63 } else { 64 } else {
65 maxwell3d.RefreshParameters();
64 cache_info.lle_program->Execute(parameters, method); 66 cache_info.lle_program->Execute(parameters, method);
65 } 67 }
66 } else { 68 } else {
@@ -106,6 +108,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
106 cache_info.hle_program = std::move(hle_program); 108 cache_info.hle_program = std::move(hle_program);
107 cache_info.hle_program->Execute(parameters, method); 109 cache_info.hle_program->Execute(parameters, method);
108 } else { 110 } else {
111 maxwell3d.RefreshParameters();
109 cache_info.lle_program->Execute(parameters, method); 112 cache_info.lle_program->Execute(parameters, method);
110 } 113 }
111 } 114 }
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 07d97ba39..737ced9a4 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -137,6 +137,7 @@ private:
137 std::unordered_map<u32, CacheInfo> macro_cache; 137 std::unordered_map<u32, CacheInfo> macro_cache;
138 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; 138 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
139 std::unique_ptr<HLEMacro> hle_macros; 139 std::unique_ptr<HLEMacro> hle_macros;
140 Engines::Maxwell3D& maxwell3d;
140}; 141};
141 142
142std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); 143std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index da988cc0d..79fab96e1 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -3,6 +3,7 @@
3 3
4#include <array> 4#include <array>
5#include <vector> 5#include <vector>
6#include "common/assert.h"
6#include "common/scope_exit.h" 7#include "common/scope_exit.h"
7#include "video_core/dirty_flags.h" 8#include "video_core/dirty_flags.h"
8#include "video_core/engines/draw_manager.h" 9#include "video_core/engines/draw_manager.h"
@@ -15,143 +16,365 @@
15namespace Tegra { 16namespace Tegra {
16namespace { 17namespace {
17 18
18using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); 19bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
19 20 switch (topology) {
20// HLE'd functions 21 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
21void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 22 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
22 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); 23 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop:
23 maxwell3d.draw_manager->DrawIndex( 24 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
24 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0x3ffffff), 25 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
25 parameters[4], parameters[1], parameters[3], parameters[5], instance_count); 26 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
27 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
28 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
29 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
30 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
31 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
32 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches:
33 return true;
34 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads:
35 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
36 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon:
37 default:
38 return false;
39 }
26} 40}
27 41
28void HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 42class HLEMacroImpl : public CachedMacro {
29 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); 43public:
30 maxwell3d.draw_manager->DrawArray( 44 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
31 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
32 parameters[3], parameters[1], parameters[4], instance_count);
33}
34 45
35void HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 46protected:
36 const u32 element_base = parameters[4]; 47 void advanceCheck() {
37 const u32 base_instance = parameters[5]; 48 current_value = (current_value + 1) % fibonacci_post;
38 maxwell3d.regs.vertex_id_base = element_base; 49 check_limit = current_value == 0;
39 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 50 if (check_limit) {
40 maxwell3d.CallMethod(0x8e3, 0x640, true); 51 const u32 new_fibonacci = fibonacci_pre + fibonacci_post;
41 maxwell3d.CallMethod(0x8e4, element_base, true); 52 fibonacci_pre = fibonacci_post;
42 maxwell3d.CallMethod(0x8e5, base_instance, true); 53 fibonacci_post = new_fibonacci;
43 54 }
44 auto& params = maxwell3d.draw_manager->GetIndirectParams(); 55 }
45 params.is_indexed = true; 56
46 params.include_count = false; 57 Engines::Maxwell3D& maxwell3d;
47 params.count_start_address = 0; 58 u32 fibonacci_pre{89};
48 params.indirect_start_address = maxwell3d.macro_addresses[1]; 59 u32 fibonacci_post{144};
49 params.buffer_size = 5 * sizeof(u32); 60 u32 current_value{fibonacci_post - 1};
50 params.max_draw_counts = 1; 61 bool check_limit{};
51 params.stride = 0; 62};
52 63
53 maxwell3d.draw_manager->DrawIndexedIndirect( 64class HLE_771BB18C62444DA0 final : public HLEMacroImpl {
54 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), 0, 65public:
55 1U << 18); 66 explicit HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
56
57 maxwell3d.regs.vertex_id_base = 0x0;
58 maxwell3d.CallMethod(0x8e3, 0x640, true);
59 maxwell3d.CallMethod(0x8e4, 0x0, true);
60 maxwell3d.CallMethod(0x8e5, 0x0, true);
61}
62 67
63// Multidraw Indixed Indirect 68 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
64void HLE_MultiDrawIndexedIndirect(Engines::Maxwell3D& maxwell3d, 69 maxwell3d.RefreshParameters();
65 const std::vector<u32>& parameters) { 70 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
66 const u32 start_indirect = parameters[0]; 71 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
67 const u32 end_indirect = parameters[1]; 72 maxwell3d.draw_manager->DrawIndex(
68 if (start_indirect >= end_indirect) { 73 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
69 // Nothing to do. 74 0x3ffffff),
70 return; 75 parameters[4], parameters[1], parameters[3], parameters[5], instance_count);
71 } 76 }
72 const auto topology = 77};
73 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); 78
74 const u32 padding = parameters[3]; // padding is in words 79class HLE_DrawArraysIndirect final : public HLEMacroImpl {
75 80public:
76 // size of each indirect segment 81 explicit HLE_DrawArraysIndirect(Engines::Maxwell3D& maxwell3d_, bool extended_ = false)
77 const u32 indirect_words = 5 + padding; 82 : HLEMacroImpl(maxwell3d_), extended(extended_) {}
78 const u32 stride = indirect_words * sizeof(u32); 83
79 const std::size_t draw_count = end_indirect - start_indirect; 84 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
80 u32 lowest_first = std::numeric_limits<u32>::max(); 85 auto topology =
81 u32 highest_limit = std::numeric_limits<u32>::min(); 86 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
82 for (std::size_t index = 0; index < draw_count; index++) { 87 if (!IsTopologySafe(topology)) {
83 const std::size_t base = index * indirect_words + 5; 88 Fallback(parameters);
84 const u32 count = parameters[base]; 89 return;
85 const u32 first_index = parameters[base + 2]; 90 }
86 lowest_first = std::min(lowest_first, first_index); 91
87 highest_limit = std::max(highest_limit, first_index + count); 92 auto& params = maxwell3d.draw_manager->GetIndirectParams();
93 params.is_indexed = false;
94 params.include_count = false;
95 params.count_start_address = 0;
96 params.indirect_start_address = maxwell3d.getMacroAddress(1);
97 params.buffer_size = 4 * sizeof(u32);
98 params.max_draw_counts = 1;
99 params.stride = 0;
100
101 if (extended) {
102 maxwell3d.CallMethod(0x8e3, 0x640, true);
103 maxwell3d.CallMethod(0x8e4, parameters[4], true);
104 }
105
106 maxwell3d.draw_manager->DrawArrayIndirect(topology);
107
108 if (extended) {
109 maxwell3d.CallMethod(0x8e3, 0x640, true);
110 maxwell3d.CallMethod(0x8e4, 0, true);
111 }
112 maxwell3d.regs.vertex_buffer.first = 0;
113 maxwell3d.regs.vertex_buffer.count = 0;
88 } 114 }
89 115
90 const u32 base_vertex = parameters[8]; 116private:
91 const u32 base_instance = parameters[9]; 117 void Fallback(const std::vector<u32>& parameters) {
92 maxwell3d.regs.vertex_id_base = base_vertex; 118 SCOPE_EXIT({
93 maxwell3d.CallMethod(0x8e3, 0x640, true); 119 if (extended) {
94 maxwell3d.CallMethod(0x8e4, base_vertex, true); 120 maxwell3d.CallMethod(0x8e3, 0x640, true);
95 maxwell3d.CallMethod(0x8e5, base_instance, true); 121 maxwell3d.CallMethod(0x8e4, 0, true);
96 auto& params = maxwell3d.draw_manager->GetIndirectParams(); 122 }
97 params.is_indexed = true; 123 });
98 params.include_count = true; 124 maxwell3d.RefreshParameters();
99 params.count_start_address = maxwell3d.macro_addresses[4]; 125 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
100 params.indirect_start_address = maxwell3d.macro_addresses[5];
101 params.buffer_size = stride * draw_count;
102 params.max_draw_counts = draw_count;
103 params.stride = stride;
104 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
105 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
106}
107 126
108// Multi-layer Clear 127 const u32 vertex_first = parameters[3];
109void HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 128 const u32 vertex_count = parameters[1];
110 ASSERT(parameters.size() == 1);
111 129
112 const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; 130 if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
113 const u32 rt_index = clear_params.RT; 131 ASSERT_MSG(false, "Faulty draw!");
114 const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; 132 return;
115 ASSERT(clear_params.layer == 0); 133 }
116 134
117 maxwell3d.regs.clear_surface.raw = clear_params.raw; 135 const u32 base_instance = parameters[4];
118 maxwell3d.draw_manager->Clear(num_layers); 136 if (extended) {
119} 137 maxwell3d.CallMethod(0x8e3, 0x640, true);
138 maxwell3d.CallMethod(0x8e4, base_instance, true);
139 }
140
141 maxwell3d.draw_manager->DrawArray(
142 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
143 vertex_first, vertex_count, base_instance, instance_count);
144 }
120 145
121constexpr std::array<std::pair<u64, HLEFunction>, 5> hle_funcs{{ 146 bool extended;
122 {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, 147};
123 {0x0D61FC9FAAC9FCAD, &HLE_DrawArraysIndirect},
124 {0x0217920100488FF7, &HLE_DrawIndexedIndirect},
125 {0x3F5E74B9C9A50164, &HLE_MultiDrawIndexedIndirect},
126 {0xEAD26C3E2109B06B, &HLE_MultiLayerClear},
127}};
128 148
129class HLEMacroImpl final : public CachedMacro { 149class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
130public: 150public:
131 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) 151 explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
132 : maxwell3d{maxwell3d_}, func{func_} {}
133 152
134 void Execute(const std::vector<u32>& parameters, u32 method) override { 153 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
135 func(maxwell3d, parameters); 154 auto topology =
155 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
156 if (!IsTopologySafe(topology)) {
157 Fallback(parameters);
158 return;
159 }
160
161 advanceCheck();
162 if (check_limit) {
163 maxwell3d.RefreshParameters();
164 minimum_limit = std::max(parameters[3], minimum_limit);
165 }
166
167 const u32 base_vertex = parameters[8];
168 const u32 base_instance = parameters[9];
169 maxwell3d.regs.vertex_id_base = base_vertex;
170 maxwell3d.CallMethod(0x8e3, 0x640, true);
171 maxwell3d.CallMethod(0x8e4, base_vertex, true);
172 maxwell3d.CallMethod(0x8e5, base_instance, true);
173 auto& params = maxwell3d.draw_manager->GetIndirectParams();
174 params.is_indexed = true;
175 params.include_count = false;
176 params.count_start_address = 0;
177 params.indirect_start_address = maxwell3d.getMacroAddress(1);
178 params.buffer_size = 5 * sizeof(u32);
179 params.max_draw_counts = 1;
180 params.stride = 0;
181 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
182 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit);
183 maxwell3d.CallMethod(0x8e3, 0x640, true);
184 maxwell3d.CallMethod(0x8e4, 0x0, true);
185 maxwell3d.CallMethod(0x8e5, 0x0, true);
136 } 186 }
137 187
138private: 188private:
139 Engines::Maxwell3D& maxwell3d; 189 void Fallback(const std::vector<u32>& parameters) {
140 HLEFunction func; 190 maxwell3d.RefreshParameters();
191 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
192 const u32 element_base = parameters[4];
193 const u32 base_instance = parameters[5];
194 maxwell3d.regs.vertex_id_base = element_base;
195 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
196 maxwell3d.CallMethod(0x8e3, 0x640, true);
197 maxwell3d.CallMethod(0x8e4, element_base, true);
198 maxwell3d.CallMethod(0x8e5, base_instance, true);
199
200 maxwell3d.draw_manager->DrawIndex(
201 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
202 parameters[3], parameters[1], element_base, base_instance, instance_count);
203
204 maxwell3d.regs.vertex_id_base = 0x0;
205 maxwell3d.CallMethod(0x8e3, 0x640, true);
206 maxwell3d.CallMethod(0x8e4, 0x0, true);
207 maxwell3d.CallMethod(0x8e5, 0x0, true);
208 }
209
210 u32 minimum_limit{1 << 18};
211};
212
213class HLE_MultiLayerClear final : public HLEMacroImpl {
214public:
215 explicit HLE_MultiLayerClear(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
216
217 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
218 maxwell3d.RefreshParameters();
219 ASSERT(parameters.size() == 1);
220
221 const Engines::Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
222 const u32 rt_index = clear_params.RT;
223 const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
224 ASSERT(clear_params.layer == 0);
225
226 maxwell3d.regs.clear_surface.raw = clear_params.raw;
227 maxwell3d.draw_manager->Clear(num_layers);
228 }
229};
230
231class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
232public:
233 explicit HLE_MultiDrawIndexedIndirectCount(Engines::Maxwell3D& maxwell3d_)
234 : HLEMacroImpl(maxwell3d_) {}
235
236 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
237 const auto topology =
238 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
239 if (!IsTopologySafe(topology)) {
240 Fallback(parameters);
241 return;
242 }
243
244 advanceCheck();
245 if (check_limit) {
246 maxwell3d.RefreshParameters();
247 }
248 const u32 start_indirect = parameters[0];
249 const u32 end_indirect = parameters[1];
250 if (start_indirect >= end_indirect) {
251 // Nothing to do.
252 return;
253 }
254
255 maxwell3d.regs.draw.topology.Assign(topology);
256 const u32 padding = parameters[3]; // padding is in words
257
258 // size of each indirect segment
259 const u32 indirect_words = 5 + padding;
260 const u32 stride = indirect_words * sizeof(u32);
261 const std::size_t draw_count = end_indirect - start_indirect;
262 u32 lowest_first = std::numeric_limits<u32>::max();
263 u32 highest_limit = std::numeric_limits<u32>::min();
264 for (std::size_t index = 0; index < draw_count; index++) {
265 const std::size_t base = index * indirect_words + 5;
266 const u32 count = parameters[base];
267 const u32 first_index = parameters[base + 2];
268 lowest_first = std::min(lowest_first, first_index);
269 highest_limit = std::max(highest_limit, first_index + count);
270 }
271 if (check_limit) {
272 minimum_limit = std::max(highest_limit, minimum_limit);
273 }
274
275 maxwell3d.regs.index_buffer.first = 0;
276 maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit);
277 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
278 auto& params = maxwell3d.draw_manager->GetIndirectParams();
279 params.is_indexed = true;
280 params.include_count = true;
281 params.count_start_address = maxwell3d.getMacroAddress(4);
282 params.indirect_start_address = maxwell3d.getMacroAddress(5);
283 params.buffer_size = stride * draw_count;
284 params.max_draw_counts = draw_count;
285 params.stride = stride;
286 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
287 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
288 }
289
290private:
291 void Fallback(const std::vector<u32>& parameters) {
292 SCOPE_EXIT({
293 // Clean everything.
294 // Clean everything.
295 maxwell3d.regs.vertex_id_base = 0x0;
296 maxwell3d.CallMethod(0x8e3, 0x640, true);
297 maxwell3d.CallMethod(0x8e4, 0x0, true);
298 maxwell3d.CallMethod(0x8e5, 0x0, true);
299 });
300 maxwell3d.RefreshParameters();
301 const u32 start_indirect = parameters[0];
302 const u32 end_indirect = parameters[1];
303 if (start_indirect >= end_indirect) {
304 // Nothing to do.
305 return;
306 }
307 const auto topology =
308 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
309 maxwell3d.regs.draw.topology.Assign(topology);
310 const u32 padding = parameters[3];
311 const std::size_t max_draws = parameters[4];
312
313 const u32 indirect_words = 5 + padding;
314 const std::size_t first_draw = start_indirect;
315 const std::size_t effective_draws = end_indirect - start_indirect;
316 const std::size_t last_draw = start_indirect + std::min(effective_draws, max_draws);
317
318 for (std::size_t index = first_draw; index < last_draw; index++) {
319 const std::size_t base = index * indirect_words + 5;
320 const u32 base_vertex = parameters[base + 3];
321 const u32 base_instance = parameters[base + 4];
322 maxwell3d.regs.vertex_id_base = base_vertex;
323 maxwell3d.CallMethod(0x8e3, 0x640, true);
324 maxwell3d.CallMethod(0x8e4, base_vertex, true);
325 maxwell3d.CallMethod(0x8e5, base_instance, true);
326 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
327 maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
328 base_vertex, base_instance, parameters[base + 1]);
329 }
330 }
331
332 u32 minimum_limit{1 << 12};
141}; 333};
142 334
143} // Anonymous namespace 335} // Anonymous namespace
144 336
145HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} 337HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
338 builders.emplace(0x771BB18C62444DA0ULL,
339 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
340 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
341 return std::make_unique<HLE_771BB18C62444DA0>(maxwell3d);
342 }));
343 builders.emplace(0x0D61FC9FAAC9FCADULL,
344 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
345 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
346 return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d);
347 }));
348 builders.emplace(0x8A4D173EB99A8603ULL,
349 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
350 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
351 return std::make_unique<HLE_DrawArraysIndirect>(maxwell3d, true);
352 }));
353 builders.emplace(0x0217920100488FF7ULL,
354 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
355 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
356 return std::make_unique<HLE_DrawIndexedIndirect>(maxwell3d);
357 }));
358 builders.emplace(0x3F5E74B9C9A50164ULL,
359 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
360 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
361 return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(maxwell3d);
362 }));
363 builders.emplace(0xEAD26C3E2109B06BULL,
364 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
365 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
366 return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
367 }));
368}
369
146HLEMacro::~HLEMacro() = default; 370HLEMacro::~HLEMacro() = default;
147 371
148std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { 372std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
149 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), 373 const auto it = builders.find(hash);
150 [hash](const auto& pair) { return pair.first == hash; }); 374 if (it == builders.end()) {
151 if (it == hle_funcs.end()) {
152 return nullptr; 375 return nullptr;
153 } 376 }
154 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); 377 return it->second(maxwell3d);
155} 378}
156 379
157} // namespace Tegra 380} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index 625332c9d..33f92fab1 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -3,7 +3,10 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <functional>
6#include <memory> 7#include <memory>
8#include <unordered_map>
9
7#include "common/common_types.h" 10#include "common/common_types.h"
8 11
9namespace Tegra { 12namespace Tegra {
@@ -23,6 +26,8 @@ public:
23 26
24private: 27private:
25 Engines::Maxwell3D& maxwell3d; 28 Engines::Maxwell3D& maxwell3d;
29 std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>>
30 builders;
26}; 31};
27 32
28} // namespace Tegra 33} // namespace Tegra