diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 113 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/engines/engine_interface.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 23 | ||||
| -rw-r--r-- | src/video_core/macro/macro.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 122 |
7 files changed, 179 insertions, 128 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 98343628c..f86edaa3e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -170,11 +170,6 @@ public: | |||
| 170 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | 170 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |
| 171 | bool is_written, bool is_image); | 171 | bool is_written, bool is_image); |
| 172 | 172 | ||
| 173 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 174 | bool synchronize = true, | ||
| 175 | bool mark_as_written = false, | ||
| 176 | bool discard_downloads = false); | ||
| 177 | |||
| 178 | void FlushCachedWrites(); | 173 | void FlushCachedWrites(); |
| 179 | 174 | ||
| 180 | /// Return true when there are uncommitted buffers to be downloaded | 175 | /// Return true when there are uncommitted buffers to be downloaded |
| @@ -354,8 +349,6 @@ private: | |||
| 354 | 349 | ||
| 355 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 350 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 356 | 351 | ||
| 357 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 358 | |||
| 359 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 352 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 360 | std::span<BufferCopy> copies); | 353 | std::span<BufferCopy> copies); |
| 361 | 354 | ||
| @@ -442,7 +435,6 @@ private: | |||
| 442 | 435 | ||
| 443 | std::vector<BufferId> cached_write_buffer_ids; | 436 | std::vector<BufferId> cached_write_buffer_ids; |
| 444 | 437 | ||
| 445 | IntervalSet discarded_ranges; | ||
| 446 | IntervalSet uncommitted_ranges; | 438 | IntervalSet uncommitted_ranges; |
| 447 | IntervalSet common_ranges; | 439 | IntervalSet common_ranges; |
| 448 | std::deque<IntervalSet> committed_ranges; | 440 | std::deque<IntervalSet> committed_ranges; |
| @@ -600,17 +592,13 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 600 | }}; | 592 | }}; |
| 601 | 593 | ||
| 602 | boost::container::small_vector<IntervalType, 4> tmp_intervals; | 594 | boost::container::small_vector<IntervalType, 4> tmp_intervals; |
| 603 | const bool is_high_accuracy = | ||
| 604 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 605 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { | 595 | auto mirror = [&](VAddr base_address, VAddr base_address_end) { |
| 606 | const u64 size = base_address_end - base_address; | 596 | const u64 size = base_address_end - base_address; |
| 607 | const VAddr diff = base_address - *cpu_src_address; | 597 | const VAddr diff = base_address - *cpu_src_address; |
| 608 | const VAddr new_base_address = *cpu_dest_address + diff; | 598 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 609 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 599 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 600 | uncommitted_ranges.add(add_interval); | ||
| 610 | tmp_intervals.push_back(add_interval); | 601 | tmp_intervals.push_back(add_interval); |
| 611 | if (is_high_accuracy) { | ||
| 612 | uncommitted_ranges.add(add_interval); | ||
| 613 | } | ||
| 614 | }; | 602 | }; |
| 615 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 603 | ForEachWrittenRange(*cpu_src_address, amount, mirror); |
| 616 | // This subtraction in this order is important for overlapping copies. | 604 | // This subtraction in this order is important for overlapping copies. |
| @@ -822,32 +810,6 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 822 | } | 810 | } |
| 823 | 811 | ||
| 824 | template <class P> | 812 | template <class P> |
| 825 | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 826 | bool synchronize, | ||
| 827 | bool mark_as_written, | ||
| 828 | bool discard_downloads) { | ||
| 829 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||
| 830 | if (!cpu_addr) { | ||
| 831 | return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||
| 832 | } | ||
| 833 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 834 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 835 | if (synchronize) { | ||
| 836 | // SynchronizeBuffer(buffer, *cpu_addr, size); | ||
| 837 | SynchronizeBufferNoModified(buffer, *cpu_addr, size); | ||
| 838 | } | ||
| 839 | if (mark_as_written) { | ||
| 840 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||
| 841 | } | ||
| 842 | if (discard_downloads) { | ||
| 843 | IntervalType interval{*cpu_addr, size}; | ||
| 844 | ClearDownload(interval); | ||
| 845 | discarded_ranges.subtract(interval); | ||
| 846 | } | ||
| 847 | return {&buffer, buffer.Offset(*cpu_addr)}; | ||
| 848 | } | ||
| 849 | |||
| 850 | template <class P> | ||
| 851 | void BufferCache<P>::FlushCachedWrites() { | 813 | void BufferCache<P>::FlushCachedWrites() { |
| 852 | for (const BufferId buffer_id : cached_write_buffer_ids) { | 814 | for (const BufferId buffer_id : cached_write_buffer_ids) { |
| 853 | slot_buffers[buffer_id].FlushCachedWrites(); | 815 | slot_buffers[buffer_id].FlushCachedWrites(); |
| @@ -862,6 +824,10 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 862 | 824 | ||
| 863 | template <class P> | 825 | template <class P> |
| 864 | void BufferCache<P>::AccumulateFlushes() { | 826 | void BufferCache<P>::AccumulateFlushes() { |
| 827 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 828 | uncommitted_ranges.clear(); | ||
| 829 | return; | ||
| 830 | } | ||
| 865 | if (uncommitted_ranges.empty()) { | 831 | if (uncommitted_ranges.empty()) { |
| 866 | return; | 832 | return; |
| 867 | } | 833 | } |
| @@ -877,14 +843,12 @@ template <class P> | |||
| 877 | void BufferCache<P>::CommitAsyncFlushesHigh() { | 843 | void BufferCache<P>::CommitAsyncFlushesHigh() { |
| 878 | AccumulateFlushes(); | 844 | AccumulateFlushes(); |
| 879 | 845 | ||
| 880 | for (const auto& interval : discarded_ranges) { | ||
| 881 | common_ranges.subtract(interval); | ||
| 882 | } | ||
| 883 | |||
| 884 | if (committed_ranges.empty()) { | 846 | if (committed_ranges.empty()) { |
| 885 | return; | 847 | return; |
| 886 | } | 848 | } |
| 887 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 849 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 850 | const bool is_accuracy_normal = | ||
| 851 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 888 | 852 | ||
| 889 | auto it = committed_ranges.begin(); | 853 | auto it = committed_ranges.begin(); |
| 890 | while (it != committed_ranges.end()) { | 854 | while (it != committed_ranges.end()) { |
| @@ -909,6 +873,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 909 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 873 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 910 | buffer.ForEachDownloadRangeAndClear( | 874 | buffer.ForEachDownloadRangeAndClear( |
| 911 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 875 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { |
| 876 | if (is_accuracy_normal) { | ||
| 877 | return; | ||
| 878 | } | ||
| 912 | const VAddr buffer_addr = buffer.CpuAddr(); | 879 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 913 | const auto add_download = [&](VAddr start, VAddr end) { | 880 | const auto add_download = [&](VAddr start, VAddr end) { |
| 914 | const u64 new_offset = start - buffer_addr; | 881 | const u64 new_offset = start - buffer_addr; |
| @@ -973,7 +940,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 973 | 940 | ||
| 974 | template <class P> | 941 | template <class P> |
| 975 | void BufferCache<P>::CommitAsyncFlushes() { | 942 | void BufferCache<P>::CommitAsyncFlushes() { |
| 976 | CommitAsyncFlushesHigh(); | 943 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { |
| 944 | CommitAsyncFlushesHigh(); | ||
| 945 | } else { | ||
| 946 | uncommitted_ranges.clear(); | ||
| 947 | committed_ranges.clear(); | ||
| 948 | } | ||
| 977 | } | 949 | } |
| 978 | 950 | ||
| 979 | template <class P> | 951 | template <class P> |
| @@ -1353,7 +1325,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1353 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 1325 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1354 | const auto& index_array = draw_state.index_buffer; | 1326 | const auto& index_array = draw_state.index_buffer; |
| 1355 | auto& flags = maxwell3d->dirty.flags; | 1327 | auto& flags = maxwell3d->dirty.flags; |
| 1356 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | 1328 | if (!flags[Dirty::IndexBuffer]) { |
| 1357 | return; | 1329 | return; |
| 1358 | } | 1330 | } |
| 1359 | flags[Dirty::IndexBuffer] = false; | 1331 | flags[Dirty::IndexBuffer] = false; |
| @@ -1574,11 +1546,7 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||
| 1574 | if (!is_async) { | 1546 | if (!is_async) { |
| 1575 | return; | 1547 | return; |
| 1576 | } | 1548 | } |
| 1577 | const bool is_high_accuracy = | 1549 | uncommitted_ranges.add(base_interval); |
| 1578 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; | ||
| 1579 | if (is_high_accuracy) { | ||
| 1580 | uncommitted_ranges.add(base_interval); | ||
| 1581 | } | ||
| 1582 | } | 1550 | } |
| 1583 | 1551 | ||
| 1584 | template <class P> | 1552 | template <class P> |
| @@ -1772,51 +1740,6 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1772 | } | 1740 | } |
| 1773 | 1741 | ||
| 1774 | template <class P> | 1742 | template <class P> |
| 1775 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1776 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1777 | u64 total_size_bytes = 0; | ||
| 1778 | u64 largest_copy = 0; | ||
| 1779 | IntervalSet found_sets{}; | ||
| 1780 | auto make_copies = [&] { | ||
| 1781 | for (auto& interval : found_sets) { | ||
| 1782 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1783 | const VAddr cpu_addr = interval.lower(); | ||
| 1784 | copies.push_back(BufferCopy{ | ||
| 1785 | .src_offset = total_size_bytes, | ||
| 1786 | .dst_offset = cpu_addr - buffer.CpuAddr(), | ||
| 1787 | .size = sub_size, | ||
| 1788 | }); | ||
| 1789 | total_size_bytes += sub_size; | ||
| 1790 | largest_copy = std::max(largest_copy, sub_size); | ||
| 1791 | } | ||
| 1792 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1793 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1794 | }; | ||
| 1795 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1796 | const VAddr base_adr = buffer.CpuAddr() + range_offset; | ||
| 1797 | const VAddr end_adr = base_adr + range_size; | ||
| 1798 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1799 | found_sets.add(add_interval); | ||
| 1800 | }); | ||
| 1801 | if (found_sets.empty()) { | ||
| 1802 | return true; | ||
| 1803 | } | ||
| 1804 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1805 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1806 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1807 | if (it == common_ranges.end()) { | ||
| 1808 | make_copies(); | ||
| 1809 | return false; | ||
| 1810 | } | ||
| 1811 | while (it != it_end) { | ||
| 1812 | found_sets.subtract(*it); | ||
| 1813 | it++; | ||
| 1814 | } | ||
| 1815 | make_copies(); | ||
| 1816 | return false; | ||
| 1817 | } | ||
| 1818 | |||
| 1819 | template <class P> | ||
| 1820 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1743 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1821 | std::span<BufferCopy> copies) { | 1744 | std::span<BufferCopy> copies) { |
| 1822 | if constexpr (USE_MEMORY_MAPS) { | 1745 | if constexpr (USE_MEMORY_MAPS) { |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 7a82355da..b3e9cb82e 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -77,11 +77,20 @@ bool DmaPusher::Step() { | |||
| 77 | command_headers.resize_destructive(command_list_header.size); | 77 | command_headers.resize_destructive(command_list_header.size); |
| 78 | constexpr u32 MacroRegistersStart = 0xE00; | 78 | constexpr u32 MacroRegistersStart = 0xE00; |
| 79 | if (dma_state.method < MacroRegistersStart) { | 79 | if (dma_state.method < MacroRegistersStart) { |
| 80 | memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), | 80 | if (Settings::IsGPULevelHigh()) { |
| 81 | command_list_header.size * sizeof(u32)); | 81 | memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), |
| 82 | command_list_header.size * sizeof(u32)); | ||
| 83 | } else { | ||
| 84 | memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), | ||
| 85 | command_list_header.size * sizeof(u32)); | ||
| 86 | } | ||
| 82 | } else { | 87 | } else { |
| 83 | memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), | 88 | const size_t copy_size = command_list_header.size * sizeof(u32); |
| 84 | command_list_header.size * sizeof(u32)); | 89 | if (subchannels[dma_state.subchannel]) { |
| 90 | subchannels[dma_state.subchannel]->current_dirty = | ||
| 91 | memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); | ||
| 92 | } | ||
| 93 | memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); | ||
| 85 | } | 94 | } |
| 86 | ProcessCommands(command_headers); | 95 | ProcessCommands(command_headers); |
| 87 | } | 96 | } |
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index 76630272d..38f1abdc4 100644 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h | |||
| @@ -18,6 +18,7 @@ public: | |||
| 18 | virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 18 | virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 19 | u32 methods_pending) = 0; | 19 | u32 methods_pending) = 0; |
| 20 | 20 | ||
| 21 | bool current_dirty{}; | ||
| 21 | GPUVAddr current_dma_segment; | 22 | GPUVAddr current_dma_segment; |
| 22 | }; | 23 | }; |
| 23 | 24 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a9fd6d960..bbe3202fe 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <cstring> | 4 | #include <cstring> |
| 5 | #include <optional> | 5 | #include <optional> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/settings.h" | ||
| 7 | #include "core/core.h" | 8 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 9 | #include "video_core/dirty_flags.h" | 10 | #include "video_core/dirty_flags.h" |
| @@ -14,6 +15,7 @@ | |||
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/textures/texture.h" | 16 | #include "video_core/textures/texture.h" |
| 16 | 17 | ||
| 18 | |||
| 17 | namespace Tegra::Engines { | 19 | namespace Tegra::Engines { |
| 18 | 20 | ||
| 19 | using VideoCore::QueryType; | 21 | using VideoCore::QueryType; |
| @@ -134,6 +136,8 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 134 | macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); | 136 | macro_addresses.push_back(current_dma_segment + i * sizeof(u32)); |
| 135 | } | 137 | } |
| 136 | macro_segments.emplace_back(current_dma_segment, amount); | 138 | macro_segments.emplace_back(current_dma_segment, amount); |
| 139 | current_macro_dirty |= current_dirty; | ||
| 140 | current_dirty = false; | ||
| 137 | 141 | ||
| 138 | // Call the macro when there are no more parameters in the command buffer | 142 | // Call the macro when there are no more parameters in the command buffer |
| 139 | if (is_last_call) { | 143 | if (is_last_call) { |
| @@ -141,10 +145,14 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 141 | macro_params.clear(); | 145 | macro_params.clear(); |
| 142 | macro_addresses.clear(); | 146 | macro_addresses.clear(); |
| 143 | macro_segments.clear(); | 147 | macro_segments.clear(); |
| 148 | current_macro_dirty = false; | ||
| 144 | } | 149 | } |
| 145 | } | 150 | } |
| 146 | 151 | ||
| 147 | void Maxwell3D::RefreshParameters() { | 152 | void Maxwell3D::RefreshParametersImpl() { |
| 153 | if (!Settings::IsGPULevelHigh()) { | ||
| 154 | return; | ||
| 155 | } | ||
| 148 | size_t current_index = 0; | 156 | size_t current_index = 0; |
| 149 | for (auto& segment : macro_segments) { | 157 | for (auto& segment : macro_segments) { |
| 150 | if (segment.first == 0) { | 158 | if (segment.first == 0) { |
| @@ -157,21 +165,6 @@ void Maxwell3D::RefreshParameters() { | |||
| 157 | } | 165 | } |
| 158 | } | 166 | } |
| 159 | 167 | ||
| 160 | bool Maxwell3D::AnyParametersDirty() { | ||
| 161 | size_t current_index = 0; | ||
| 162 | for (auto& segment : macro_segments) { | ||
| 163 | if (segment.first == 0) { | ||
| 164 | current_index += segment.second; | ||
| 165 | continue; | ||
| 166 | } | ||
| 167 | if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { | ||
| 168 | return true; | ||
| 169 | } | ||
| 170 | current_index += segment.second; | ||
| 171 | } | ||
| 172 | return false; | ||
| 173 | } | ||
| 174 | |||
| 175 | u32 Maxwell3D::GetMaxCurrentVertices() { | 168 | u32 Maxwell3D::GetMaxCurrentVertices() { |
| 176 | u32 num_vertices = 0; | 169 | u32 num_vertices = 0; |
| 177 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { | 170 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { |
| @@ -332,7 +325,6 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | |||
| 332 | 325 | ||
| 333 | const u32 argument = ProcessShadowRam(method, method_argument); | 326 | const u32 argument = ProcessShadowRam(method, method_argument); |
| 334 | ProcessDirtyRegisters(method, argument); | 327 | ProcessDirtyRegisters(method, argument); |
| 335 | |||
| 336 | ProcessMethodCall(method, argument, method_argument, is_last_call); | 328 | ProcessMethodCall(method, argument, method_argument, is_last_call); |
| 337 | } | 329 | } |
| 338 | 330 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index cd996413c..f0a379801 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -272,6 +272,7 @@ public: | |||
| 272 | }; | 272 | }; |
| 273 | 273 | ||
| 274 | union { | 274 | union { |
| 275 | u32 raw; | ||
| 275 | BitField<0, 1, Mode> mode; | 276 | BitField<0, 1, Mode> mode; |
| 276 | BitField<4, 8, u32> pad; | 277 | BitField<4, 8, u32> pad; |
| 277 | }; | 278 | }; |
| @@ -1217,10 +1218,12 @@ public: | |||
| 1217 | 1218 | ||
| 1218 | struct Window { | 1219 | struct Window { |
| 1219 | union { | 1220 | union { |
| 1221 | u32 raw_1; | ||
| 1220 | BitField<0, 16, u32> x_min; | 1222 | BitField<0, 16, u32> x_min; |
| 1221 | BitField<16, 16, u32> x_max; | 1223 | BitField<16, 16, u32> x_max; |
| 1222 | }; | 1224 | }; |
| 1223 | union { | 1225 | union { |
| 1226 | u32 raw_2; | ||
| 1224 | BitField<0, 16, u32> y_min; | 1227 | BitField<0, 16, u32> y_min; |
| 1225 | BitField<16, 16, u32> y_max; | 1228 | BitField<16, 16, u32> y_max; |
| 1226 | }; | 1229 | }; |
| @@ -3090,9 +3093,16 @@ public: | |||
| 3090 | return macro_addresses[index]; | 3093 | return macro_addresses[index]; |
| 3091 | } | 3094 | } |
| 3092 | 3095 | ||
| 3093 | void RefreshParameters(); | 3096 | void RefreshParameters() { |
| 3097 | if (!current_macro_dirty) { | ||
| 3098 | return; | ||
| 3099 | } | ||
| 3100 | RefreshParametersImpl(); | ||
| 3101 | } | ||
| 3094 | 3102 | ||
| 3095 | bool AnyParametersDirty(); | 3103 | bool AnyParametersDirty() { |
| 3104 | return current_macro_dirty; | ||
| 3105 | } | ||
| 3096 | 3106 | ||
| 3097 | u32 GetMaxCurrentVertices(); | 3107 | u32 GetMaxCurrentVertices(); |
| 3098 | 3108 | ||
| @@ -3101,6 +3111,9 @@ public: | |||
| 3101 | /// Handles a write to the CLEAR_BUFFERS register. | 3111 | /// Handles a write to the CLEAR_BUFFERS register. |
| 3102 | void ProcessClearBuffers(u32 layer_count); | 3112 | void ProcessClearBuffers(u32 layer_count); |
| 3103 | 3113 | ||
| 3114 | /// Handles a write to the CB_BIND register. | ||
| 3115 | void ProcessCBBind(size_t stage_index); | ||
| 3116 | |||
| 3104 | private: | 3117 | private: |
| 3105 | void InitializeRegisterDefaults(); | 3118 | void InitializeRegisterDefaults(); |
| 3106 | 3119 | ||
| @@ -3154,12 +3167,11 @@ private: | |||
| 3154 | void ProcessCBData(u32 value); | 3167 | void ProcessCBData(u32 value); |
| 3155 | void ProcessCBMultiData(const u32* start_base, u32 amount); | 3168 | void ProcessCBMultiData(const u32* start_base, u32 amount); |
| 3156 | 3169 | ||
| 3157 | /// Handles a write to the CB_BIND register. | ||
| 3158 | void ProcessCBBind(size_t stage_index); | ||
| 3159 | |||
| 3160 | /// Returns a query's value or an empty object if the value will be deferred through a cache. | 3170 | /// Returns a query's value or an empty object if the value will be deferred through a cache. |
| 3161 | std::optional<u64> GetQueryResult(); | 3171 | std::optional<u64> GetQueryResult(); |
| 3162 | 3172 | ||
| 3173 | void RefreshParametersImpl(); | ||
| 3174 | |||
| 3163 | Core::System& system; | 3175 | Core::System& system; |
| 3164 | MemoryManager& memory_manager; | 3176 | MemoryManager& memory_manager; |
| 3165 | 3177 | ||
| @@ -3187,6 +3199,7 @@ private: | |||
| 3187 | bool draw_indexed{}; | 3199 | bool draw_indexed{}; |
| 3188 | std::vector<std::pair<GPUVAddr, size_t>> macro_segments; | 3200 | std::vector<std::pair<GPUVAddr, size_t>> macro_segments; |
| 3189 | std::vector<GPUVAddr> macro_addresses; | 3201 | std::vector<GPUVAddr> macro_addresses; |
| 3202 | bool current_macro_dirty{}; | ||
| 3190 | }; | 3203 | }; |
| 3191 | 3204 | ||
| 3192 | #define ASSERT_REG_POSITION(field_name, position) \ | 3205 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 01dd25f95..49c47dafe 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/fs/fs.h" | 13 | #include "common/fs/fs.h" |
| 14 | #include "common/fs/path_util.h" | 14 | #include "common/fs/path_util.h" |
| 15 | #include "common/microprofile.h" | ||
| 15 | #include "common/settings.h" | 16 | #include "common/settings.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/macro/macro.h" | 18 | #include "video_core/macro/macro.h" |
| @@ -22,6 +23,8 @@ | |||
| 22 | #include "video_core/macro/macro_jit_x64.h" | 23 | #include "video_core/macro/macro_jit_x64.h" |
| 23 | #endif | 24 | #endif |
| 24 | 25 | ||
| 26 | MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro hle", MP_RGB(128, 192, 192)); | ||
| 27 | |||
| 25 | namespace Tegra { | 28 | namespace Tegra { |
| 26 | 29 | ||
| 27 | static void Dump(u64 hash, std::span<const u32> code) { | 30 | static void Dump(u64 hash, std::span<const u32> code) { |
| @@ -60,6 +63,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | |||
| 60 | if (compiled_macro != macro_cache.end()) { | 63 | if (compiled_macro != macro_cache.end()) { |
| 61 | const auto& cache_info = compiled_macro->second; | 64 | const auto& cache_info = compiled_macro->second; |
| 62 | if (cache_info.has_hle_program) { | 65 | if (cache_info.has_hle_program) { |
| 66 | MICROPROFILE_SCOPE(MacroHLE); | ||
| 63 | cache_info.hle_program->Execute(parameters, method); | 67 | cache_info.hle_program->Execute(parameters, method); |
| 64 | } else { | 68 | } else { |
| 65 | maxwell3d.RefreshParameters(); | 69 | maxwell3d.RefreshParameters(); |
| @@ -106,6 +110,7 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | |||
| 106 | if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { | 110 | if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { |
| 107 | cache_info.has_hle_program = true; | 111 | cache_info.has_hle_program = true; |
| 108 | cache_info.hle_program = std::move(hle_program); | 112 | cache_info.hle_program = std::move(hle_program); |
| 113 | MICROPROFILE_SCOPE(MacroHLE); | ||
| 109 | cache_info.hle_program->Execute(parameters, method); | 114 | cache_info.hle_program->Execute(parameters, method); |
| 110 | } else { | 115 | } else { |
| 111 | maxwell3d.RefreshParameters(); | 116 | maxwell3d.RefreshParameters(); |
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 638247e55..3eac50975 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -86,7 +86,7 @@ public: | |||
| 86 | 86 | ||
| 87 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | 87 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
| 88 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | 88 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |
| 89 | if (!IsTopologySafe(topology)) { | 89 | if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
| 90 | Fallback(parameters); | 90 | Fallback(parameters); |
| 91 | return; | 91 | return; |
| 92 | } | 92 | } |
| @@ -117,8 +117,8 @@ private: | |||
| 117 | void Fallback(const std::vector<u32>& parameters) { | 117 | void Fallback(const std::vector<u32>& parameters) { |
| 118 | SCOPE_EXIT({ | 118 | SCOPE_EXIT({ |
| 119 | if (extended) { | 119 | if (extended) { |
| 120 | maxwell3d.CallMethod(0x8e3, 0x640, true); | 120 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 121 | maxwell3d.CallMethod(0x8e4, 0, true); | 121 | maxwell3d.replace_table.clear(); |
| 122 | } | 122 | } |
| 123 | }); | 123 | }); |
| 124 | maxwell3d.RefreshParameters(); | 124 | maxwell3d.RefreshParameters(); |
| @@ -127,7 +127,8 @@ private: | |||
| 127 | const u32 vertex_first = parameters[3]; | 127 | const u32 vertex_first = parameters[3]; |
| 128 | const u32 vertex_count = parameters[1]; | 128 | const u32 vertex_count = parameters[1]; |
| 129 | 129 | ||
| 130 | if (maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { | 130 | if (maxwell3d.AnyParametersDirty() && |
| 131 | maxwell3d.GetMaxCurrentVertices() < vertex_first + vertex_count) { | ||
| 131 | ASSERT_MSG(false, "Faulty draw!"); | 132 | ASSERT_MSG(false, "Faulty draw!"); |
| 132 | return; | 133 | return; |
| 133 | } | 134 | } |
| @@ -157,7 +158,7 @@ public: | |||
| 157 | 158 | ||
| 158 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | 159 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { |
| 159 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | 160 | auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); |
| 160 | if (!IsTopologySafe(topology)) { | 161 | if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { |
| 161 | Fallback(parameters); | 162 | Fallback(parameters); |
| 162 | return; | 163 | return; |
| 163 | } | 164 | } |
| @@ -169,7 +170,11 @@ public: | |||
| 169 | } | 170 | } |
| 170 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | 171 | const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); |
| 171 | const u32 base_size = std::max<u32>(minimum_limit, estimate); | 172 | const u32 base_size = std::max<u32>(minimum_limit, estimate); |
| 172 | maxwell3d.regs.draw.topology.Assign(topology); | 173 | const u32 element_base = parameters[4]; |
| 174 | const u32 base_instance = parameters[5]; | ||
| 175 | maxwell3d.regs.vertex_id_base = element_base; | ||
| 176 | maxwell3d.regs.global_base_vertex_index = element_base; | ||
| 177 | maxwell3d.regs.global_base_instance_index = base_instance; | ||
| 173 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 178 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 174 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | 179 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 175 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | 180 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |
| @@ -186,6 +191,9 @@ public: | |||
| 186 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | 191 | maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); |
| 187 | maxwell3d.engine_state = Maxwell::EngineHint::None; | 192 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 188 | maxwell3d.replace_table.clear(); | 193 | maxwell3d.replace_table.clear(); |
| 194 | maxwell3d.regs.vertex_id_base = 0x0; | ||
| 195 | maxwell3d.regs.global_base_vertex_index = 0x0; | ||
| 196 | maxwell3d.regs.global_base_instance_index = 0x0; | ||
| 189 | } | 197 | } |
| 190 | 198 | ||
| 191 | private: | 199 | private: |
| @@ -195,6 +203,8 @@ private: | |||
| 195 | const u32 element_base = parameters[4]; | 203 | const u32 element_base = parameters[4]; |
| 196 | const u32 base_instance = parameters[5]; | 204 | const u32 base_instance = parameters[5]; |
| 197 | maxwell3d.regs.vertex_id_base = element_base; | 205 | maxwell3d.regs.vertex_id_base = element_base; |
| 206 | maxwell3d.regs.global_base_vertex_index = element_base; | ||
| 207 | maxwell3d.regs.global_base_instance_index = base_instance; | ||
| 198 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | 208 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; |
| 199 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | 209 | maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; |
| 200 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | 210 | maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); |
| @@ -205,6 +215,8 @@ private: | |||
| 205 | parameters[3], parameters[1], element_base, base_instance, instance_count); | 215 | parameters[3], parameters[1], element_base, base_instance, instance_count); |
| 206 | 216 | ||
| 207 | maxwell3d.regs.vertex_id_base = 0x0; | 217 | maxwell3d.regs.vertex_id_base = 0x0; |
| 218 | maxwell3d.regs.global_base_vertex_index = 0x0; | ||
| 219 | maxwell3d.regs.global_base_instance_index = 0x0; | ||
| 208 | maxwell3d.engine_state = Maxwell::EngineHint::None; | 220 | maxwell3d.engine_state = Maxwell::EngineHint::None; |
| 209 | maxwell3d.replace_table.clear(); | 221 | maxwell3d.replace_table.clear(); |
| 210 | } | 222 | } |
| @@ -253,7 +265,6 @@ public: | |||
| 253 | return; | 265 | return; |
| 254 | } | 266 | } |
| 255 | 267 | ||
| 256 | maxwell3d.regs.draw.topology.Assign(topology); | ||
| 257 | const u32 padding = parameters[3]; // padding is in words | 268 | const u32 padding = parameters[3]; // padding is in words |
| 258 | 269 | ||
| 259 | // size of each indirect segment | 270 | // size of each indirect segment |
| @@ -335,6 +346,83 @@ private: | |||
| 335 | u32 minimum_limit{1 << 12}; | 346 | u32 minimum_limit{1 << 12}; |
| 336 | }; | 347 | }; |
| 337 | 348 | ||
| 349 | class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { | ||
| 350 | public: | ||
| 351 | explicit HLE_C713C83D8F63CCF3(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 352 | |||
| 353 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 354 | maxwell3d.RefreshParameters(); | ||
| 355 | const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; | ||
| 356 | const u32 address = maxwell3d.regs.shadow_scratch[24]; | ||
| 357 | auto& const_buffer = maxwell3d.regs.const_buffer; | ||
| 358 | const_buffer.size = 0x7000; | ||
| 359 | const_buffer.address_high = (address >> 24) & 0xFF; | ||
| 360 | const_buffer.address_low = address << 8; | ||
| 361 | const_buffer.offset = offset; | ||
| 362 | } | ||
| 363 | }; | ||
| 364 | |||
| 365 | class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { | ||
| 366 | public: | ||
| 367 | explicit HLE_D7333D26E0A93EDE(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 368 | |||
| 369 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 370 | maxwell3d.RefreshParameters(); | ||
| 371 | const size_t index = parameters[0]; | ||
| 372 | const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; | ||
| 373 | const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; | ||
| 374 | auto& const_buffer = maxwell3d.regs.const_buffer; | ||
| 375 | const_buffer.size = size; | ||
| 376 | const_buffer.address_high = (address >> 24) & 0xFF; | ||
| 377 | const_buffer.address_low = address << 8; | ||
| 378 | } | ||
| 379 | }; | ||
| 380 | |||
| 381 | class HLE_BindShader final : public HLEMacroImpl { | ||
| 382 | public: | ||
| 383 | explicit HLE_BindShader(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 384 | |||
| 385 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 386 | maxwell3d.RefreshParameters(); | ||
| 387 | auto& regs = maxwell3d.regs; | ||
| 388 | const u32 index = parameters[0]; | ||
| 389 | if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { | ||
| 390 | return; | ||
| 391 | } | ||
| 392 | |||
| 393 | regs.pipelines[index & 0xF].offset = parameters[2]; | ||
| 394 | maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; | ||
| 395 | regs.shadow_scratch[28 + index] = parameters[1]; | ||
| 396 | regs.shadow_scratch[34 + index] = parameters[2]; | ||
| 397 | |||
| 398 | const u32 address = parameters[4]; | ||
| 399 | auto& const_buffer = regs.const_buffer; | ||
| 400 | const_buffer.size = 0x10000; | ||
| 401 | const_buffer.address_high = (address >> 24) & 0xFF; | ||
| 402 | const_buffer.address_low = address << 8; | ||
| 403 | |||
| 404 | const size_t bind_group_id = parameters[3] & 0x7F; | ||
| 405 | auto& bind_group = regs.bind_groups[bind_group_id]; | ||
| 406 | bind_group.raw_config = 0x11; | ||
| 407 | maxwell3d.ProcessCBBind(bind_group_id); | ||
| 408 | } | ||
| 409 | }; | ||
| 410 | |||
| 411 | class HLE_SetRasterBoundingBox final : public HLEMacroImpl { | ||
| 412 | public: | ||
| 413 | explicit HLE_SetRasterBoundingBox(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 414 | |||
| 415 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 416 | maxwell3d.RefreshParameters(); | ||
| 417 | const u32 raster_mode = parameters[0]; | ||
| 418 | auto& regs = maxwell3d.regs; | ||
| 419 | const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; | ||
| 420 | const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; | ||
| 421 | regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; | ||
| 422 | regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); | ||
| 423 | } | ||
| 424 | }; | ||
| 425 | |||
| 338 | } // Anonymous namespace | 426 | } // Anonymous namespace |
| 339 | 427 | ||
| 340 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | 428 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { |
| @@ -368,6 +456,26 @@ HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | |||
| 368 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | 456 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { |
| 369 | return std::make_unique<HLE_MultiLayerClear>(maxwell3d); | 457 | return std::make_unique<HLE_MultiLayerClear>(maxwell3d); |
| 370 | })); | 458 | })); |
| 459 | builders.emplace(0xC713C83D8F63CCF3ULL, | ||
| 460 | std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||
| 461 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||
| 462 | return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d); | ||
| 463 | })); | ||
| 464 | builders.emplace(0xD7333D26E0A93EDEULL, | ||
| 465 | std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||
| 466 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||
| 467 | return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d); | ||
| 468 | })); | ||
| 469 | builders.emplace(0xEB29B2A09AA06D38ULL, | ||
| 470 | std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||
| 471 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||
| 472 | return std::make_unique<HLE_BindShader>(maxwell3d); | ||
| 473 | })); | ||
| 474 | builders.emplace(0xDB1341DBEB4C8AF7ULL, | ||
| 475 | std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>( | ||
| 476 | [](Engines::Maxwell3D& maxwell3d) -> std::unique_ptr<CachedMacro> { | ||
| 477 | return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d); | ||
| 478 | })); | ||
| 371 | } | 479 | } |
| 372 | 480 | ||
| 373 | HLEMacro::~HLEMacro() = default; | 481 | HLEMacro::~HLEMacro() = default; |