summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h113
-rw-r--r--src/video_core/dma_pusher.cpp17
-rw-r--r--src/video_core/engines/engine_interface.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp26
-rw-r--r--src/video_core/engines/maxwell_3d.h23
-rw-r--r--src/video_core/macro/macro.cpp5
-rw-r--r--src/video_core/macro/macro_hle.cpp122
7 files changed, 179 insertions, 128 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 98343628c..f86edaa3e 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -170,11 +170,6 @@ public:
170 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, 170 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
171 bool is_written, bool is_image); 171 bool is_written, bool is_image);
172 172
173 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
174 bool synchronize = true,
175 bool mark_as_written = false,
176 bool discard_downloads = false);
177
178 void FlushCachedWrites(); 173 void FlushCachedWrites();
179 174
180 /// Return true when there are uncommitted buffers to be downloaded 175 /// Return true when there are uncommitted buffers to be downloaded
@@ -354,8 +349,6 @@ private:
354 349
355 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); 350 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
356 351
357 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
358
359 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 352 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
360 std::span<BufferCopy> copies); 353 std::span<BufferCopy> copies);
361 354
@@ -442,7 +435,6 @@ private:
442 435
443 std::vector<BufferId> cached_write_buffer_ids; 436 std::vector<BufferId> cached_write_buffer_ids;
444 437
445 IntervalSet discarded_ranges;
446 IntervalSet uncommitted_ranges; 438 IntervalSet uncommitted_ranges;
447 IntervalSet common_ranges; 439 IntervalSet common_ranges;
448 std::deque<IntervalSet> committed_ranges; 440 std::deque<IntervalSet> committed_ranges;
@@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
600 }}; 592 }};
601 593
602 boost::container::small_vector<IntervalType, 4> tmp_intervals; 594 boost::container::small_vector<IntervalType, 4> tmp_intervals;
603 const bool is_high_accuracy =
604 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
605 auto mirror = [&](VAddr base_address, VAddr base_address_end) { 595 auto mirror = [&](VAddr base_address, VAddr base_address_end) {
606 const u64 size = base_address_end - base_address; 596 const u64 size = base_address_end - base_address;
607 const VAddr diff = base_address - *cpu_src_address; 597 const VAddr diff = base_address - *cpu_src_address;
608 const VAddr new_base_address = *cpu_dest_address + diff; 598 const VAddr new_base_address = *cpu_dest_address + diff;
609 const IntervalType add_interval{new_base_address, new_base_address + size}; 599 const IntervalType add_interval{new_base_address, new_base_address + size};
600 uncommitted_ranges.add(add_interval);
610 tmp_intervals.push_back(add_interval); 601 tmp_intervals.push_back(add_interval);
611 if (is_high_accuracy) {
612 uncommitted_ranges.add(add_interval);
613 }
614 }; 602 };
615 ForEachWrittenRange(*cpu_src_address, amount, mirror); 603 ForEachWrittenRange(*cpu_src_address, amount, mirror);
616 // This subtraction in this order is important for overlapping copies. 604 // This subtraction in this order is important for overlapping copies.
@@ -822,32 +810,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
822} 810}
823 811
824template <class P> 812template <class P>
825std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size,
826 bool synchronize,
827 bool mark_as_written,
828 bool discard_downloads) {
829 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
830 if (!cpu_addr) {
831 return {&slot_buffers[NULL_BUFFER_ID], 0};
832 }
833 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
834 Buffer& buffer = slot_buffers[buffer_id];
835 if (synchronize) {
836 // SynchronizeBuffer(buffer, *cpu_addr, size);
837 SynchronizeBufferNoModified(buffer, *cpu_addr, size);
838 }
839 if (mark_as_written) {
840 MarkWrittenBuffer(buffer_id, *cpu_addr, size);
841 }
842 if (discard_downloads) {
843 IntervalType interval{*cpu_addr, size};
844 ClearDownload(interval);
845 discarded_ranges.subtract(interval);
846 }
847 return {&buffer, buffer.Offset(*cpu_addr)};
848}
849
850template <class P>
851void BufferCache<P>::FlushCachedWrites() { 813void BufferCache<P>::FlushCachedWrites() {
852 for (const BufferId buffer_id : cached_write_buffer_ids) { 814 for (const BufferId buffer_id : cached_write_buffer_ids) {
853 slot_buffers[buffer_id].FlushCachedWrites(); 815 slot_buffers[buffer_id].FlushCachedWrites();
@@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
862 824
863template <class P> 825template <class P>
864void BufferCache<P>::AccumulateFlushes() { 826void BufferCache<P>::AccumulateFlushes() {
827 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
828 uncommitted_ranges.clear();
829 return;
830 }
865 if (uncommitted_ranges.empty()) { 831 if (uncommitted_ranges.empty()) {
866 return; 832 return;
867 } 833 }
@@ -877,14 +843,12 @@ template <class P>
877void BufferCache<P>::CommitAsyncFlushesHigh() { 843void BufferCache<P>::CommitAsyncFlushesHigh() {
878 AccumulateFlushes(); 844 AccumulateFlushes();
879 845
880 for (const auto& interval : discarded_ranges) {
881 common_ranges.subtract(interval);
882 }
883
884 if (committed_ranges.empty()) { 846 if (committed_ranges.empty()) {
885 return; 847 return;
886 } 848 }
887 MICROPROFILE_SCOPE(GPU_DownloadMemory); 849 MICROPROFILE_SCOPE(GPU_DownloadMemory);
850 const bool is_accuracy_normal =
851 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
888 852
889 auto it = committed_ranges.begin(); 853 auto it = committed_ranges.begin();
890 while (it != committed_ranges.end()) { 854 while (it != committed_ranges.end()) {
@@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
909 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 873 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
910 buffer.ForEachDownloadRangeAndClear( 874 buffer.ForEachDownloadRangeAndClear(
911 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 875 cpu_addr, size, [&](u64 range_offset, u64 range_size) {
876 if (is_accuracy_normal) {
877 return;
878 }
912 const VAddr buffer_addr = buffer.CpuAddr(); 879 const VAddr buffer_addr = buffer.CpuAddr();
913 const auto add_download = [&](VAddr start, VAddr end) { 880 const auto add_download = [&](VAddr start, VAddr end) {
914 const u64 new_offset = start - buffer_addr; 881 const u64 new_offset = start - buffer_addr;
@@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
973 940
974template <class P> 941template <class P>
975void BufferCache<P>::CommitAsyncFlushes() { 942void BufferCache<P>::CommitAsyncFlushes() {
976 CommitAsyncFlushesHigh(); 943 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
944 CommitAsyncFlushesHigh();
945 } else {
946 uncommitted_ranges.clear();
947 committed_ranges.clear();
948 }
977} 949}
978 950
979template <class P> 951template <class P>
@@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
1353 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 1325 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1354 const auto& index_array = draw_state.index_buffer; 1326 const auto& index_array = draw_state.index_buffer;
1355 auto& flags = maxwell3d->dirty.flags; 1327 auto& flags = maxwell3d->dirty.flags;
1356 if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { 1328 if (!flags[Dirty::IndexBuffer]) {
1357 return; 1329 return;
1358 } 1330 }
1359 flags[Dirty::IndexBuffer] = false; 1331 flags[Dirty::IndexBuffer] = false;
@@ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
1574 if (!is_async) { 1546 if (!is_async) {
1575 return; 1547 return;
1576 } 1548 }
1577 const bool is_high_accuracy = 1549 uncommitted_ranges.add(base_interval);
1578 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
1579 if (is_high_accuracy) {
1580 uncommitted_ranges.add(base_interval);
1581 }
1582} 1550}
1583 1551
1584template <class P> 1552template <class P>
@@ -1772,51 +1740,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1772} 1740}
1773 1741
1774template <class P> 1742template <class P>
1775bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1776 boost::container::small_vector<BufferCopy, 4> copies;
1777 u64 total_size_bytes = 0;
1778 u64 largest_copy = 0;
1779 IntervalSet found_sets{};
1780 auto make_copies = [&] {
1781 for (auto& interval : found_sets) {
1782 const std::size_t sub_size = interval.upper() - interval.lower();
1783 const VAddr cpu_addr = interval.lower();
1784 copies.push_back(BufferCopy{
1785 .src_offset = total_size_bytes,
1786 .dst_offset = cpu_addr - buffer.CpuAddr(),
1787 .size = sub_size,
1788 });
1789 total_size_bytes += sub_size;
1790 largest_copy = std::max(largest_copy, sub_size);
1791 }
1792 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1793 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1794 };
1795 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1796 const VAddr base_adr = buffer.CpuAddr() + range_offset;
1797 const VAddr end_adr = base_adr + range_size;
1798 const IntervalType add_interval{base_adr, end_adr};
1799 found_sets.add(add_interval);
1800 });
1801 if (found_sets.empty()) {
1802 return true;
1803 }
1804 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1805 auto it = common_ranges.lower_bound(search_interval);
1806 auto it_end = common_ranges.upper_bound(search_interval);
1807 if (it == common_ranges.end()) {
1808 make_copies();
1809 return false;
1810 }
1811 while (it != it_end) {
1812 found_sets.subtract(*it);
1813 it++;
1814 }
1815 make_copies();
1816 return false;
1817}
1818
1819template <class P>
1820void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1743void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1821 std::span<BufferCopy> copies) { 1744 std::span<BufferCopy> copies) {
1822 if constexpr (USE_MEMORY_MAPS) { 1745 if constexpr (USE_MEMORY_MAPS) {
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 7a82355da..b3e9cb82e 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -77,11 +77,20 @@ bool DmaPusher::Step() {
77 command_headers.resize_destructive(command_list_header.size); 77 command_headers.resize_destructive(command_list_header.size);
78 constexpr u32 MacroRegistersStart = 0xE00; 78 constexpr u32 MacroRegistersStart = 0xE00;
79 if (dma_state.method < MacroRegistersStart) { 79 if (dma_state.method < MacroRegistersStart) {
80 memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), 80 if (Settings::IsGPULevelHigh()) {
81 command_list_header.size * sizeof(u32)); 81 memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
82 command_list_header.size * sizeof(u32));
83 } else {
84 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
85 command_list_header.size * sizeof(u32));
86 }
82 } else { 87 } else {
83 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), 88 const size_t copy_size = command_list_header.size * sizeof(u32);
84 command_list_header.size * sizeof(u32)); 89 if (subchannels[dma_state.subchannel]) {
90 subchannels[dma_state.subchannel]->current_dirty =
91 memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
92 }
93 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
85 } 94 }
86 ProcessCommands(command_headers); 95 ProcessCommands(command_headers);
87 } 96 }
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index 76630272d..38f1abdc4 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -18,6 +18,7 @@ public:
18 virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 18 virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
19 u32 methods_pending) = 0; 19 u32 methods_pending) = 0;
20 20
21 bool current_dirty{};
21 GPUVAddr current_dma_segment; 22 GPUVAddr current_dma_segment;
22}; 23};
23 24
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a9fd6d960..bbe3202fe 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,6 +4,7 @@
4#include <cstring> 4#include <cstring>
5#include <optional> 5#include <optional>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/settings.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/core_timing.h" 9#include "core/core_timing.h"
9#include "video_core/dirty_flags.h" 10#include "video_core/dirty_flags.h"
@@ -14,6 +15,7 @@
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
16 17
18
17namespace Tegra::Engines { 19namespace Tegra::Engines {
18 20
19using VideoCore::QueryType; 21using VideoCore::QueryType;
@@ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
134 macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); 136 macro_addresses.push_back(current_dma_segment + i * sizeof(u32));
135 } 137 }
136 macro_segments.emplace_back(current_dma_segment, amount); 138 macro_segments.emplace_back(current_dma_segment, amount);
139 current_macro_dirty |= current_dirty;
140 current_dirty = false;
137 141
138 // Call the macro when there are no more parameters in the command buffer 142 // Call the macro when there are no more parameters in the command buffer
139 if (is_last_call) { 143 if (is_last_call) {
@@ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool
141 macro_params.clear(); 145 macro_params.clear();
142 macro_addresses.clear(); 146 macro_addresses.clear();
143 macro_segments.clear(); 147 macro_segments.clear();
148 current_macro_dirty = false;
144 } 149 }
145} 150}
146 151
147void Maxwell3D::RefreshParameters() { 152void Maxwell3D::RefreshParametersImpl() {
153 if (!Settings::IsGPULevelHigh()) {
154 return;
155 }
148 size_t current_index = 0; 156 size_t current_index = 0;
149 for (auto& segment : macro_segments) { 157 for (auto& segment : macro_segments) {
150 if (segment.first == 0) { 158 if (segment.first == 0) {
@@ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() {
157 } 165 }
158} 166}
159 167
160bool Maxwell3D::AnyParametersDirty() {
161 size_t current_index = 0;
162 for (auto& segment : macro_segments) {
163 if (segment.first == 0) {
164 current_index += segment.second;
165 continue;
166 }
167 if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) {
168 return true;
169 }
170 current_index += segment.second;
171 }
172 return false;
173}
174
175u32 Maxwell3D::GetMaxCurrentVertices() { 168u32 Maxwell3D::GetMaxCurrentVertices() {
176 u32 num_vertices = 0; 169 u32 num_vertices = 0;
177 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { 170 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) {
@@ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
332 325
333 const u32 argument = ProcessShadowRam(method, method_argument); 326 const u32 argument = ProcessShadowRam(method, method_argument);
334 ProcessDirtyRegisters(method, argument); 327 ProcessDirtyRegisters(method, argument);
335
336 ProcessMethodCall(method, argument, method_argument, is_last_call); 328 ProcessMethodCall(method, argument, method_argument, is_last_call);
337} 329}
338 330
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cd996413c..f0a379801 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -272,6 +272,7 @@ public:
272 }; 272 };
273 273
274 union { 274 union {
275 u32 raw;
275 BitField<0, 1, Mode> mode; 276 BitField<0, 1, Mode> mode;
276 BitField<4, 8, u32> pad; 277 BitField<4, 8, u32> pad;
277 }; 278 };
@@ -1217,10 +1218,12 @@ public:
1217 1218
1218 struct Window { 1219 struct Window {
1219 union { 1220 union {
1221 u32 raw_1;
1220 BitField<0, 16, u32> x_min; 1222 BitField<0, 16, u32> x_min;
1221 BitField<16, 16, u32> x_max; 1223 BitField<16, 16, u32> x_max;
1222 }; 1224 };
1223 union { 1225 union {
1226 u32 raw_2;
1224 BitField<0, 16, u32> y_min; 1227 BitField<0, 16, u32> y_min;
1225 BitField<16, 16, u32> y_max; 1228 BitField<16, 16, u32> y_max;
1226 }; 1229 };
@@ -3090,9 +3093,16 @@ public:
3090 return macro_addresses[index]; 3093 return macro_addresses[index];
3091 } 3094 }
3092 3095
3093 void RefreshParameters(); 3096 void RefreshParameters() {
3097 if (!current_macro_dirty) {
3098 return;
3099 }
3100 RefreshParametersImpl();
3101 }
3094 3102
3095 bool AnyParametersDirty(); 3103 bool AnyParametersDirty() {
3104 return current_macro_dirty;
3105 }
3096 3106
3097 u32 GetMaxCurrentVertices(); 3107 u32 GetMaxCurrentVertices();
3098 3108
@@ -3101,6 +3111,9 @@ public:
3101 /// Handles a write to the CLEAR_BUFFERS register. 3111 /// Handles a write to the CLEAR_BUFFERS register.
3102 void ProcessClearBuffers(u32 layer_count); 3112 void ProcessClearBuffers(u32 layer_count);
3103 3113
3114 /// Handles a write to the CB_BIND register.
3115 void ProcessCBBind(size_t stage_index);
3116
3104private: 3117private:
3105 void InitializeRegisterDefaults(); 3118 void InitializeRegisterDefaults();
3106 3119
@@ -3154,12 +3167,11 @@ private:
3154 void ProcessCBData(u32 value); 3167 void ProcessCBData(u32 value);
3155 void ProcessCBMultiData(const u32* start_base, u32 amount); 3168 void ProcessCBMultiData(const u32* start_base, u32 amount);
3156 3169
3157 /// Handles a write to the CB_BIND register.
3158 void ProcessCBBind(size_t stage_index);
3159
3160 /// Returns a query's value or an empty object if the value will be deferred through a cache. 3170 /// Returns a query's value or an empty object if the value will be deferred through a cache.
3161 std::optional<u64> GetQueryResult(); 3171 std::optional<u64> GetQueryResult();
3162 3172
3173 void RefreshParametersImpl();
3174
3163 Core::System& system; 3175 Core::System& system;
3164 MemoryManager& memory_manager; 3176 MemoryManager& memory_manager;
3165 3177
@@ -3187,6 +3199,7 @@ private:
3187 bool draw_indexed{}; 3199 bool draw_indexed{};
3188 std::vector<std::pair<GPUVAddr, size_t>> macro_segments; 3200 std::vector<std::pair<GPUVAddr, size_t>> macro_segments;
3189 std::vector<GPUVAddr> macro_addresses; 3201 std::vector<GPUVAddr> macro_addresses;
3202 bool current_macro_dirty{};
3190}; 3203};
3191 3204
3192#define ASSERT_REG_POSITION(field_name, position) \ 3205#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 01dd25f95..49c47dafe 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -12,6 +12,7 @@
12#include "common/assert.h" 12#include "common/assert.h"
13#include "common/fs/fs.h" 13#include "common/fs/fs.h"
14#include "common/fs/path_util.h" 14#include "common/fs/path_util.h"
15#include "common/microprofile.h"
15#include "common/settings.h" 16#include "common/settings.h"
16#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
17#include "video_core/macro/macro.h" 18#include "video_core/macro/macro.h"
@@ -22,6 +23,8 @@
22#include "video_core/macro/macro_jit_x64.h" 23#include "video_core/macro/macro_jit_x64.h"
23#endif 24#endif
24 25
26MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192));
27
25namespace Tegra { 28namespace Tegra {
26 29
27static void Dump(u64 hash, std::span<const u32> code) { 30static void Dump(u64 hash, std::span<const u32> code) {
@@ -60,6 +63,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
60 if (compiled_macro != macro_cache.end()) { 63 if (compiled_macro != macro_cache.end()) {
61 const auto& cache_info = compiled_macro->second; 64 const auto& cache_info = compiled_macro->second;
62 if (cache_info.has_hle_program) { 65 if (cache_info.has_hle_program) {
66 MICROPROFILE_SCOPE(MacroHLE);
63 cache_info.hle_program->Execute(parameters, method); 67 cache_info.hle_program->Execute(parameters, method);
64 } else { 68 } else {
65 maxwell3d.RefreshParameters(); 69 maxwell3d.RefreshParameters();
@@ -106,6 +110,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
106 if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { 110 if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
107 cache_info.has_hle_program = true; 111 cache_info.has_hle_program = true;
108 cache_info.hle_program = std::move(hle_program); 112 cache_info.hle_program = std::move(hle_program);
113 MICROPROFILE_SCOPE(MacroHLE);
109 cache_info.hle_program->Execute(parameters, method); 114 cache_info.hle_program->Execute(parameters, method);
110 } else { 115 } else {
111 maxwell3d.RefreshParameters(); 116 maxwell3d.RefreshParameters();
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 638247e55..3eac50975 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -86,7 +86,7 @@ public:
86 86
87 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { 87 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
88 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); 88 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
89 if (!IsTopologySafe(topology)) { 89 if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
90 Fallback(parameters); 90 Fallback(parameters);
91 return; 91 return;
92 } 92 }
@@ -117,8 +117,8 @@ private:
117 void Fallback(const std::vector<u32>& parameters) { 117 void Fallback(const std::vector<u32>& parameters) {
118 SCOPE_EXIT({ 118 SCOPE_EXIT({
119 if (extended) { 119 if (extended) {
120 maxwell3d.CallMethod(0x8e3, 0x640, true); 120 maxwell3d.engine_state = Maxwell::EngineHint::None;
121 maxwell3d.CallMethod(0x8e4, 0, true); 121 maxwell3d.replace_table.clear();
122 } 122 }
123 }); 123 });
124 maxwell3d.RefreshParameters(); 124 maxwell3d.RefreshParameters();
@@ -127,7 +127,8 @@ private:
127 const u32 vertex_first = parameters[3]; 127 const u32 vertex_first = parameters[3];
128 const u32 vertex_count = parameters[1]; 128 const u32 vertex_count = parameters[1];
129 129
130 if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { 130 if (maxwell3d.AnyParametersDirty() &&
131 maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) {
131 ASSERT_MSG(false, "Faulty draw!"); 132 ASSERT_MSG(false, "Faulty draw!");
132 return; 133 return;
133 } 134 }
@@ -157,7 +158,7 @@ public:
157 158
158 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { 159 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
159 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); 160 auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
160 if (!IsTopologySafe(topology)) { 161 if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
161 Fallback(parameters); 162 Fallback(parameters);
162 return; 163 return;
163 } 164 }
@@ -169,7 +170,11 @@ public:
169 } 170 }
170 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); 171 const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
171 const u32 base_size = std::max<u32>(minimum_limit, estimate); 172 const u32 base_size = std::max<u32>(minimum_limit, estimate);
172 maxwell3d.regs.draw.topology.Assign(topology); 173 const u32 element_base = parameters[4];
174 const u32 base_instance = parameters[5];
175 maxwell3d.regs.vertex_id_base = element_base;
176 maxwell3d.regs.global_base_vertex_index = element_base;
177 maxwell3d.regs.global_base_instance_index = base_instance;
173 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 178 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
174 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; 179 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
175 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); 180 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
@@ -186,6 +191,9 @@ public:
186 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); 191 maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
187 maxwell3d.engine_state = Maxwell::EngineHint::None; 192 maxwell3d.engine_state = Maxwell::EngineHint::None;
188 maxwell3d.replace_table.clear(); 193 maxwell3d.replace_table.clear();
194 maxwell3d.regs.vertex_id_base = 0x0;
195 maxwell3d.regs.global_base_vertex_index = 0x0;
196 maxwell3d.regs.global_base_instance_index = 0x0;
189 } 197 }
190 198
191private: 199private:
@@ -195,6 +203,8 @@ private:
195 const u32 element_base = parameters[4]; 203 const u32 element_base = parameters[4];
196 const u32 base_instance = parameters[5]; 204 const u32 base_instance = parameters[5];
197 maxwell3d.regs.vertex_id_base = element_base; 205 maxwell3d.regs.vertex_id_base = element_base;
206 maxwell3d.regs.global_base_vertex_index = element_base;
207 maxwell3d.regs.global_base_instance_index = base_instance;
198 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; 208 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
199 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; 209 maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
200 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); 210 maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
@@ -205,6 +215,8 @@ private:
205 parameters[3], parameters[1], element_base, base_instance, instance_count); 215 parameters[3], parameters[1], element_base, base_instance, instance_count);
206 216
207 maxwell3d.regs.vertex_id_base = 0x0; 217 maxwell3d.regs.vertex_id_base = 0x0;
218 maxwell3d.regs.global_base_vertex_index = 0x0;
219 maxwell3d.regs.global_base_instance_index = 0x0;
208 maxwell3d.engine_state = Maxwell::EngineHint::None; 220 maxwell3d.engine_state = Maxwell::EngineHint::None;
209 maxwell3d.replace_table.clear(); 221 maxwell3d.replace_table.clear();
210 } 222 }
@@ -253,7 +265,6 @@ public:
253 return; 265 return;
254 } 266 }
255 267
256 maxwell3d.regs.draw.topology.Assign(topology);
257 const u32 padding = parameters[3]; // padding is in words 268 const u32 padding = parameters[3]; // padding is in words
258 269
259 // size of each indirect segment 270 // size of each indirect segment
@@ -335,6 +346,83 @@ private:
335 u32 minimum_limit{1 << 12}; 346 u32 minimum_limit{1 << 12};
336}; 347};
337 348
349class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
350public:
351 explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
352
353 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
354 maxwell3d.RefreshParameters();
355 const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
356 const u32 address = maxwell3d.regs.shadow_scratch[24];
357 auto& const_buffer = maxwell3d.regs.const_buffer;
358 const_buffer.size = 0x7000;
359 const_buffer.address_high = (address >> 24) & 0xFF;
360 const_buffer.address_low = address << 8;
361 const_buffer.offset = offset;
362 }
363};
364
365class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
366public:
367 explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
368
369 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
370 maxwell3d.RefreshParameters();
371 const size_t index = parameters[0];
372 const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
373 const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
374 auto& const_buffer = maxwell3d.regs.const_buffer;
375 const_buffer.size = size;
376 const_buffer.address_high = (address >> 24) & 0xFF;
377 const_buffer.address_low = address << 8;
378 }
379};
380
381class HLE_BindShader final : public HLEMacroImpl {
382public:
383 explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
384
385 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
386 maxwell3d.RefreshParameters();
387 auto& regs = maxwell3d.regs;
388 const u32 index = parameters[0];
389 if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
390 return;
391 }
392
393 regs.pipelines[index & 0xF].offset = parameters[2];
394 maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
395 regs.shadow_scratch[28 + index] = parameters[1];
396 regs.shadow_scratch[34 + index] = parameters[2];
397
398 const u32 address = parameters[4];
399 auto& const_buffer = regs.const_buffer;
400 const_buffer.size = 0x10000;
401 const_buffer.address_high = (address >> 24) & 0xFF;
402 const_buffer.address_low = address << 8;
403
404 const size_t bind_group_id = parameters[3] & 0x7F;
405 auto& bind_group = regs.bind_groups[bind_group_id];
406 bind_group.raw_config = 0x11;
407 maxwell3d.ProcessCBBind(bind_group_id);
408 }
409};
410
411class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
412public:
413 explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
414
415 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
416 maxwell3d.RefreshParameters();
417 const u32 raster_mode = parameters[0];
418 auto& regs = maxwell3d.regs;
419 const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
420 const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
421 regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
422 regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
423 }
424};
425
338} // Anonymous namespace 426} // Anonymous namespace
339 427
340HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { 428HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
@@ -368,6 +456,26 @@ HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
368 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { 456 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
369 return std::make_unique<HLE_MultiLayerClear>(maxwell3d); 457 return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
370 })); 458 }));
459 builders.emplace(0xC713C83D8F63CCF3ULL,
460 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
461 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
462 return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d);
463 }));
464 builders.emplace(0xD7333D26E0A93EDEULL,
465 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
466 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
467 return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d);
468 }));
469 builders.emplace(0xEB29B2A09AA06D38ULL,
470 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
471 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
472 return std::make_unique<HLE_BindShader>(maxwell3d);
473 }));
474 builders.emplace(0xDB1341DBEB4C8AF7ULL,
475 std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>(
476 [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> {
477 return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d);
478 }));
371} 479}
372 480
373HLEMacro::~HLEMacro() = default; 481HLEMacro::~HLEMacro() = default;