diff options
| author | 2022-12-24 20:26:06 -0500 | |
|---|---|---|
| committer | 2022-12-24 20:26:06 -0500 | |
| commit | 3e6850f00bdd541202a8369438bda7988c8001f5 (patch) | |
| tree | 34691ecb826bc402f68a075de07f4f6aaebf8c44 | |
| parent | qt: fix 'Pause' menu item (#9497) (diff) | |
| parent | scratch_buffer: Explicitly defing resize and resize_destructive functions (diff) | |
| download | yuzu-3e6850f00bdd541202a8369438bda7988c8001f5.tar.gz yuzu-3e6850f00bdd541202a8369438bda7988c8001f5.tar.xz yuzu-3e6850f00bdd541202a8369438bda7988c8001f5.zip | |
Merge pull request #9453 from ameerj/scratch-vector
common: Add ScratchBuffer Class
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/make_unique_for_overwrite.h | 25 | ||||
| -rw-r--r-- | src/common/scratch_buffer.h | 95 | ||||
| -rw-r--r-- | src/tests/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/tests/common/scratch_buffer.cpp | 199 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.h | 8 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 8 | ||||
| -rw-r--r-- | src/video_core/host1x/vic.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/host1x/vic.h | 7 |
14 files changed, 370 insertions, 56 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 25b22a281..eb05e46a8 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -78,6 +78,7 @@ add_library(common STATIC | |||
| 78 | logging/types.h | 78 | logging/types.h |
| 79 | lz4_compression.cpp | 79 | lz4_compression.cpp |
| 80 | lz4_compression.h | 80 | lz4_compression.h |
| 81 | make_unique_for_overwrite.h | ||
| 81 | math_util.h | 82 | math_util.h |
| 82 | memory_detect.cpp | 83 | memory_detect.cpp |
| 83 | memory_detect.h | 84 | memory_detect.h |
| @@ -101,6 +102,7 @@ add_library(common STATIC | |||
| 101 | ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp | 102 | ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp |
| 102 | scm_rev.h | 103 | scm_rev.h |
| 103 | scope_exit.h | 104 | scope_exit.h |
| 105 | scratch_buffer.h | ||
| 104 | settings.cpp | 106 | settings.cpp |
| 105 | settings.h | 107 | settings.h |
| 106 | settings_input.cpp | 108 | settings_input.cpp |
diff --git a/src/common/make_unique_for_overwrite.h b/src/common/make_unique_for_overwrite.h new file mode 100644 index 000000000..c7413cf51 --- /dev/null +++ b/src/common/make_unique_for_overwrite.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <memory> | ||
| 7 | #include <type_traits> | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | template <class T> | ||
| 12 | requires(!std::is_array_v<T>) std::unique_ptr<T> make_unique_for_overwrite() { | ||
| 13 | return std::unique_ptr<T>(new T); | ||
| 14 | } | ||
| 15 | |||
| 16 | template <class T> | ||
| 17 | requires std::is_unbounded_array_v<T> std::unique_ptr<T> make_unique_for_overwrite(std::size_t n) { | ||
| 18 | return std::unique_ptr<T>(new std::remove_extent_t<T>[n]); | ||
| 19 | } | ||
| 20 | |||
| 21 | template <class T, class... Args> | ||
| 22 | requires std::is_bounded_array_v<T> | ||
| 23 | void make_unique_for_overwrite(Args&&...) = delete; | ||
| 24 | |||
| 25 | } // namespace Common | ||
diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h new file mode 100644 index 000000000..1245a5086 --- /dev/null +++ b/src/common/scratch_buffer.h | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/make_unique_for_overwrite.h" | ||
| 7 | |||
| 8 | namespace Common { | ||
| 9 | |||
| 10 | /** | ||
| 11 | * ScratchBuffer class | ||
| 12 | * This class creates a default initialized heap allocated buffer for cases such as intermediate | ||
| 13 | * buffers being copied into entirely, where value initializing members during allocation or resize | ||
| 14 | * is redundant. | ||
| 15 | */ | ||
| 16 | template <typename T> | ||
| 17 | class ScratchBuffer { | ||
| 18 | public: | ||
| 19 | ScratchBuffer() = default; | ||
| 20 | |||
| 21 | explicit ScratchBuffer(size_t initial_capacity) | ||
| 22 | : last_requested_size{initial_capacity}, buffer_capacity{initial_capacity}, | ||
| 23 | buffer{Common::make_unique_for_overwrite<T[]>(initial_capacity)} {} | ||
| 24 | |||
| 25 | ~ScratchBuffer() = default; | ||
| 26 | |||
| 27 | /// This will only grow the buffer's capacity if size is greater than the current capacity. | ||
| 28 | /// The previously held data will remain intact. | ||
| 29 | void resize(size_t size) { | ||
| 30 | if (size > buffer_capacity) { | ||
| 31 | auto new_buffer = Common::make_unique_for_overwrite<T[]>(size); | ||
| 32 | std::move(buffer.get(), buffer.get() + buffer_capacity, new_buffer.get()); | ||
| 33 | buffer = std::move(new_buffer); | ||
| 34 | buffer_capacity = size; | ||
| 35 | } | ||
| 36 | last_requested_size = size; | ||
| 37 | } | ||
| 38 | |||
| 39 | /// This will only grow the buffer's capacity if size is greater than the current capacity. | ||
| 40 | /// The previously held data will be destroyed if a reallocation occurs. | ||
| 41 | void resize_destructive(size_t size) { | ||
| 42 | if (size > buffer_capacity) { | ||
| 43 | buffer_capacity = size; | ||
| 44 | buffer = Common::make_unique_for_overwrite<T[]>(buffer_capacity); | ||
| 45 | } | ||
| 46 | last_requested_size = size; | ||
| 47 | } | ||
| 48 | |||
| 49 | [[nodiscard]] T* data() noexcept { | ||
| 50 | return buffer.get(); | ||
| 51 | } | ||
| 52 | |||
| 53 | [[nodiscard]] const T* data() const noexcept { | ||
| 54 | return buffer.get(); | ||
| 55 | } | ||
| 56 | |||
| 57 | [[nodiscard]] T* begin() noexcept { | ||
| 58 | return data(); | ||
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] const T* begin() const noexcept { | ||
| 62 | return data(); | ||
| 63 | } | ||
| 64 | |||
| 65 | [[nodiscard]] T* end() noexcept { | ||
| 66 | return data() + last_requested_size; | ||
| 67 | } | ||
| 68 | |||
| 69 | [[nodiscard]] const T* end() const noexcept { | ||
| 70 | return data() + last_requested_size; | ||
| 71 | } | ||
| 72 | |||
| 73 | [[nodiscard]] T& operator[](size_t i) { | ||
| 74 | return buffer[i]; | ||
| 75 | } | ||
| 76 | |||
| 77 | [[nodiscard]] const T& operator[](size_t i) const { | ||
| 78 | return buffer[i]; | ||
| 79 | } | ||
| 80 | |||
| 81 | [[nodiscard]] size_t size() const noexcept { | ||
| 82 | return last_requested_size; | ||
| 83 | } | ||
| 84 | |||
| 85 | [[nodiscard]] size_t capacity() const noexcept { | ||
| 86 | return buffer_capacity; | ||
| 87 | } | ||
| 88 | |||
| 89 | private: | ||
| 90 | size_t last_requested_size{}; | ||
| 91 | size_t buffer_capacity{}; | ||
| 92 | std::unique_ptr<T[]> buffer{}; | ||
| 93 | }; | ||
| 94 | |||
| 95 | } // namespace Common | ||
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 348d1edf4..6a4022e45 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -8,6 +8,7 @@ add_executable(tests | |||
| 8 | common/host_memory.cpp | 8 | common/host_memory.cpp |
| 9 | common/param_package.cpp | 9 | common/param_package.cpp |
| 10 | common/ring_buffer.cpp | 10 | common/ring_buffer.cpp |
| 11 | common/scratch_buffer.cpp | ||
| 11 | common/unique_function.cpp | 12 | common/unique_function.cpp |
| 12 | core/core_timing.cpp | 13 | core/core_timing.cpp |
| 13 | core/internal_network/network.cpp | 14 | core/internal_network/network.cpp |
diff --git a/src/tests/common/scratch_buffer.cpp b/src/tests/common/scratch_buffer.cpp new file mode 100644 index 000000000..b602c8d0a --- /dev/null +++ b/src/tests/common/scratch_buffer.cpp | |||
| @@ -0,0 +1,199 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | #include <catch2/catch.hpp> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/scratch_buffer.h" | ||
| 10 | |||
| 11 | namespace Common { | ||
| 12 | |||
| 13 | TEST_CASE("ScratchBuffer: Basic Test", "[common]") { | ||
| 14 | ScratchBuffer<u8> buf; | ||
| 15 | |||
| 16 | REQUIRE(buf.size() == 0U); | ||
| 17 | REQUIRE(buf.capacity() == 0U); | ||
| 18 | |||
| 19 | std::array<u8, 10> payload; | ||
| 20 | payload.fill(66); | ||
| 21 | |||
| 22 | buf.resize(payload.size()); | ||
| 23 | REQUIRE(buf.size() == payload.size()); | ||
| 24 | REQUIRE(buf.capacity() == payload.size()); | ||
| 25 | |||
| 26 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 27 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 28 | REQUIRE(buf[i] == payload[i]); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | TEST_CASE("ScratchBuffer: resize_destructive Grow", "[common]") { | ||
| 33 | std::array<u8, 10> payload; | ||
| 34 | payload.fill(66); | ||
| 35 | |||
| 36 | ScratchBuffer<u8> buf(payload.size()); | ||
| 37 | REQUIRE(buf.size() == payload.size()); | ||
| 38 | REQUIRE(buf.capacity() == payload.size()); | ||
| 39 | |||
| 40 | // Increasing the size should reallocate the buffer | ||
| 41 | buf.resize_destructive(payload.size() * 2); | ||
| 42 | REQUIRE(buf.size() == payload.size() * 2); | ||
| 43 | REQUIRE(buf.capacity() == payload.size() * 2); | ||
| 44 | |||
| 45 | // Since the buffer is not value initialized, reading its data will be garbage | ||
| 46 | } | ||
| 47 | |||
| 48 | TEST_CASE("ScratchBuffer: resize_destructive Shrink", "[common]") { | ||
| 49 | std::array<u8, 10> payload; | ||
| 50 | payload.fill(66); | ||
| 51 | |||
| 52 | ScratchBuffer<u8> buf(payload.size()); | ||
| 53 | REQUIRE(buf.size() == payload.size()); | ||
| 54 | REQUIRE(buf.capacity() == payload.size()); | ||
| 55 | |||
| 56 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 57 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 58 | REQUIRE(buf[i] == payload[i]); | ||
| 59 | } | ||
| 60 | |||
| 61 | // Decreasing the size should not cause a buffer reallocation | ||
| 62 | // This can be tested by ensuring the buffer capacity and data has not changed, | ||
| 63 | buf.resize_destructive(1U); | ||
| 64 | REQUIRE(buf.size() == 1U); | ||
| 65 | REQUIRE(buf.capacity() == payload.size()); | ||
| 66 | |||
| 67 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 68 | REQUIRE(buf[i] == payload[i]); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | TEST_CASE("ScratchBuffer: resize Grow u8", "[common]") { | ||
| 73 | std::array<u8, 10> payload; | ||
| 74 | payload.fill(66); | ||
| 75 | |||
| 76 | ScratchBuffer<u8> buf(payload.size()); | ||
| 77 | REQUIRE(buf.size() == payload.size()); | ||
| 78 | REQUIRE(buf.capacity() == payload.size()); | ||
| 79 | |||
| 80 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 81 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 82 | REQUIRE(buf[i] == payload[i]); | ||
| 83 | } | ||
| 84 | |||
| 85 | // Increasing the size should reallocate the buffer | ||
| 86 | buf.resize(payload.size() * 2); | ||
| 87 | REQUIRE(buf.size() == payload.size() * 2); | ||
| 88 | REQUIRE(buf.capacity() == payload.size() * 2); | ||
| 89 | |||
| 90 | // resize() keeps the previous data intact | ||
| 91 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 92 | REQUIRE(buf[i] == payload[i]); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | TEST_CASE("ScratchBuffer: resize Grow u64", "[common]") { | ||
| 97 | std::array<u64, 10> payload; | ||
| 98 | payload.fill(6666); | ||
| 99 | |||
| 100 | ScratchBuffer<u64> buf(payload.size()); | ||
| 101 | REQUIRE(buf.size() == payload.size()); | ||
| 102 | REQUIRE(buf.capacity() == payload.size()); | ||
| 103 | |||
| 104 | std::memcpy(buf.data(), payload.data(), payload.size() * sizeof(u64)); | ||
| 105 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 106 | REQUIRE(buf[i] == payload[i]); | ||
| 107 | } | ||
| 108 | |||
| 109 | // Increasing the size should reallocate the buffer | ||
| 110 | buf.resize(payload.size() * 2); | ||
| 111 | REQUIRE(buf.size() == payload.size() * 2); | ||
| 112 | REQUIRE(buf.capacity() == payload.size() * 2); | ||
| 113 | |||
| 114 | // resize() keeps the previous data intact | ||
| 115 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 116 | REQUIRE(buf[i] == payload[i]); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | TEST_CASE("ScratchBuffer: resize Shrink", "[common]") { | ||
| 121 | std::array<u8, 10> payload; | ||
| 122 | payload.fill(66); | ||
| 123 | |||
| 124 | ScratchBuffer<u8> buf(payload.size()); | ||
| 125 | REQUIRE(buf.size() == payload.size()); | ||
| 126 | REQUIRE(buf.capacity() == payload.size()); | ||
| 127 | |||
| 128 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 129 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 130 | REQUIRE(buf[i] == payload[i]); | ||
| 131 | } | ||
| 132 | |||
| 133 | // Decreasing the size should not cause a buffer reallocation | ||
| 134 | // This can be tested by ensuring the buffer capacity and data has not changed, | ||
| 135 | buf.resize(1U); | ||
| 136 | REQUIRE(buf.size() == 1U); | ||
| 137 | REQUIRE(buf.capacity() == payload.size()); | ||
| 138 | |||
| 139 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 140 | REQUIRE(buf[i] == payload[i]); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | TEST_CASE("ScratchBuffer: Span Size", "[common]") { | ||
| 145 | std::array<u8, 10> payload; | ||
| 146 | payload.fill(66); | ||
| 147 | |||
| 148 | ScratchBuffer<u8> buf(payload.size()); | ||
| 149 | REQUIRE(buf.size() == payload.size()); | ||
| 150 | REQUIRE(buf.capacity() == payload.size()); | ||
| 151 | |||
| 152 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 153 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 154 | REQUIRE(buf[i] == payload[i]); | ||
| 155 | } | ||
| 156 | |||
| 157 | buf.resize(3U); | ||
| 158 | REQUIRE(buf.size() == 3U); | ||
| 159 | REQUIRE(buf.capacity() == payload.size()); | ||
| 160 | |||
| 161 | const auto buf_span = std::span<u8>(buf); | ||
| 162 | // The span size is the last requested size of the buffer, not its capacity | ||
| 163 | REQUIRE(buf_span.size() == buf.size()); | ||
| 164 | |||
| 165 | for (size_t i = 0; i < buf_span.size(); ++i) { | ||
| 166 | REQUIRE(buf_span[i] == buf[i]); | ||
| 167 | REQUIRE(buf_span[i] == payload[i]); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | TEST_CASE("ScratchBuffer: Span Writes", "[common]") { | ||
| 172 | std::array<u8, 10> payload; | ||
| 173 | payload.fill(66); | ||
| 174 | |||
| 175 | ScratchBuffer<u8> buf(payload.size()); | ||
| 176 | REQUIRE(buf.size() == payload.size()); | ||
| 177 | REQUIRE(buf.capacity() == payload.size()); | ||
| 178 | |||
| 179 | std::memcpy(buf.data(), payload.data(), payload.size()); | ||
| 180 | for (size_t i = 0; i < payload.size(); ++i) { | ||
| 181 | REQUIRE(buf[i] == payload[i]); | ||
| 182 | } | ||
| 183 | |||
| 184 | buf.resize(3U); | ||
| 185 | REQUIRE(buf.size() == 3U); | ||
| 186 | REQUIRE(buf.capacity() == payload.size()); | ||
| 187 | |||
| 188 | const auto buf_span = std::span<u8>(buf); | ||
| 189 | REQUIRE(buf_span.size() == buf.size()); | ||
| 190 | |||
| 191 | for (size_t i = 0; i < buf_span.size(); ++i) { | ||
| 192 | const auto new_value = static_cast<u8>(i + 1U); | ||
| 193 | // Writes to a span of the scratch buffer will propogate to the buffer itself | ||
| 194 | buf_span[i] = new_value; | ||
| 195 | REQUIRE(buf[i] == new_value); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | } // namespace Common | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 502b4d90a..6c8d98946 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "common/lru_cache.h" | 20 | #include "common/lru_cache.h" |
| 21 | #include "common/microprofile.h" | 21 | #include "common/microprofile.h" |
| 22 | #include "common/polyfill_ranges.h" | 22 | #include "common/polyfill_ranges.h" |
| 23 | #include "common/scratch_buffer.h" | ||
| 23 | #include "common/settings.h" | 24 | #include "common/settings.h" |
| 24 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 25 | #include "video_core/buffer_cache/buffer_base.h" | 26 | #include "video_core/buffer_cache/buffer_base.h" |
| @@ -422,8 +423,7 @@ private: | |||
| 422 | IntervalSet common_ranges; | 423 | IntervalSet common_ranges; |
| 423 | std::deque<IntervalSet> committed_ranges; | 424 | std::deque<IntervalSet> committed_ranges; |
| 424 | 425 | ||
| 425 | size_t immediate_buffer_capacity = 0; | 426 | Common::ScratchBuffer<u8> immediate_buffer_alloc; |
| 426 | std::unique_ptr<u8[]> immediate_buffer_alloc; | ||
| 427 | 427 | ||
| 428 | struct LRUItemParams { | 428 | struct LRUItemParams { |
| 429 | using ObjectType = BufferId; | 429 | using ObjectType = BufferId; |
| @@ -1926,11 +1926,8 @@ std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size | |||
| 1926 | 1926 | ||
| 1927 | template <class P> | 1927 | template <class P> |
| 1928 | std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { | 1928 | std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { |
| 1929 | if (wanted_capacity > immediate_buffer_capacity) { | 1929 | immediate_buffer_alloc.resize_destructive(wanted_capacity); |
| 1930 | immediate_buffer_capacity = wanted_capacity; | 1930 | return std::span<u8>(immediate_buffer_alloc.data(), wanted_capacity); |
| 1931 | immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity); | ||
| 1932 | } | ||
| 1933 | return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity); | ||
| 1934 | } | 1931 | } |
| 1935 | 1932 | ||
| 1936 | template <class P> | 1933 | template <class P> |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 9835e3ac1..322de2606 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -56,7 +56,7 @@ bool DmaPusher::Step() { | |||
| 56 | 56 | ||
| 57 | if (command_list.prefetch_command_list.size()) { | 57 | if (command_list.prefetch_command_list.size()) { |
| 58 | // Prefetched command list from nvdrv, used for things like synchronization | 58 | // Prefetched command list from nvdrv, used for things like synchronization |
| 59 | command_headers = std::move(command_list.prefetch_command_list); | 59 | ProcessCommands(command_list.prefetch_command_list); |
| 60 | dma_pushbuffer.pop(); | 60 | dma_pushbuffer.pop(); |
| 61 | } else { | 61 | } else { |
| 62 | const CommandListHeader command_list_header{ | 62 | const CommandListHeader command_list_header{ |
| @@ -74,7 +74,7 @@ bool DmaPusher::Step() { | |||
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | // Push buffer non-empty, read a word | 76 | // Push buffer non-empty, read a word |
| 77 | command_headers.resize(command_list_header.size); | 77 | command_headers.resize_destructive(command_list_header.size); |
| 78 | if (Settings::IsGPULevelHigh()) { | 78 | if (Settings::IsGPULevelHigh()) { |
| 79 | memory_manager.ReadBlock(dma_get, command_headers.data(), | 79 | memory_manager.ReadBlock(dma_get, command_headers.data(), |
| 80 | command_list_header.size * sizeof(u32)); | 80 | command_list_header.size * sizeof(u32)); |
| @@ -82,16 +82,21 @@ bool DmaPusher::Step() { | |||
| 82 | memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(), | 82 | memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(), |
| 83 | command_list_header.size * sizeof(u32)); | 83 | command_list_header.size * sizeof(u32)); |
| 84 | } | 84 | } |
| 85 | ProcessCommands(command_headers); | ||
| 85 | } | 86 | } |
| 86 | for (std::size_t index = 0; index < command_headers.size();) { | 87 | |
| 87 | const CommandHeader& command_header = command_headers[index]; | 88 | return true; |
| 89 | } | ||
| 90 | |||
| 91 | void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) { | ||
| 92 | for (std::size_t index = 0; index < commands.size();) { | ||
| 93 | const CommandHeader& command_header = commands[index]; | ||
| 88 | 94 | ||
| 89 | if (dma_state.method_count) { | 95 | if (dma_state.method_count) { |
| 90 | // Data word of methods command | 96 | // Data word of methods command |
| 91 | if (dma_state.non_incrementing) { | 97 | if (dma_state.non_incrementing) { |
| 92 | const u32 max_write = static_cast<u32>( | 98 | const u32 max_write = static_cast<u32>( |
| 93 | std::min<std::size_t>(index + dma_state.method_count, command_headers.size()) - | 99 | std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index); |
| 94 | index); | ||
| 95 | CallMultiMethod(&command_header.argument, max_write); | 100 | CallMultiMethod(&command_header.argument, max_write); |
| 96 | dma_state.method_count -= max_write; | 101 | dma_state.method_count -= max_write; |
| 97 | dma_state.is_last_call = true; | 102 | dma_state.is_last_call = true; |
| @@ -142,8 +147,6 @@ bool DmaPusher::Step() { | |||
| 142 | } | 147 | } |
| 143 | index++; | 148 | index++; |
| 144 | } | 149 | } |
| 145 | |||
| 146 | return true; | ||
| 147 | } | 150 | } |
| 148 | 151 | ||
| 149 | void DmaPusher::SetState(const CommandHeader& command_header) { | 152 | void DmaPusher::SetState(const CommandHeader& command_header) { |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 938f0f11c..6f00de937 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -4,11 +4,13 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <span> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | #include <queue> | 9 | #include <queue> |
| 9 | 10 | ||
| 10 | #include "common/bit_field.h" | 11 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/scratch_buffer.h" | ||
| 12 | #include "video_core/engines/engine_interface.h" | 14 | #include "video_core/engines/engine_interface.h" |
| 13 | #include "video_core/engines/puller.h" | 15 | #include "video_core/engines/puller.h" |
| 14 | 16 | ||
| @@ -136,13 +138,15 @@ private: | |||
| 136 | static constexpr u32 non_puller_methods = 0x40; | 138 | static constexpr u32 non_puller_methods = 0x40; |
| 137 | static constexpr u32 max_subchannels = 8; | 139 | static constexpr u32 max_subchannels = 8; |
| 138 | bool Step(); | 140 | bool Step(); |
| 141 | void ProcessCommands(std::span<const CommandHeader> commands); | ||
| 139 | 142 | ||
| 140 | void SetState(const CommandHeader& command_header); | 143 | void SetState(const CommandHeader& command_header); |
| 141 | 144 | ||
| 142 | void CallMethod(u32 argument) const; | 145 | void CallMethod(u32 argument) const; |
| 143 | void CallMultiMethod(const u32* base_start, u32 num_methods) const; | 146 | void CallMultiMethod(const u32* base_start, u32 num_methods) const; |
| 144 | 147 | ||
| 145 | std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once | 148 | Common::ScratchBuffer<CommandHeader> |
| 149 | command_headers; ///< Buffer for list of commands fetched at once | ||
| 146 | 150 | ||
| 147 | std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed | 151 | std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed |
| 148 | std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer | 152 | std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer |
| @@ -159,7 +163,7 @@ private: | |||
| 159 | DmaState dma_state{}; | 163 | DmaState dma_state{}; |
| 160 | bool dma_increment_once{}; | 164 | bool dma_increment_once{}; |
| 161 | 165 | ||
| 162 | bool ib_enable{true}; ///< IB mode enabled | 166 | const bool ib_enable{true}; ///< IB mode enabled |
| 163 | 167 | ||
| 164 | std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; | 168 | std::array<Engines::EngineInterface*, max_subchannels> subchannels{}; |
| 165 | 169 | ||
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index e4f8331ab..cea1dd8b0 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -24,7 +24,7 @@ void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | |||
| 24 | void State::ProcessExec(const bool is_linear_) { | 24 | void State::ProcessExec(const bool is_linear_) { |
| 25 | write_offset = 0; | 25 | write_offset = 0; |
| 26 | copy_size = regs.line_length_in * regs.line_count; | 26 | copy_size = regs.line_length_in * regs.line_count; |
| 27 | inner_buffer.resize(copy_size); | 27 | inner_buffer.resize_destructive(copy_size); |
| 28 | is_linear = is_linear_; | 28 | is_linear = is_linear_; |
| 29 | } | 29 | } |
| 30 | 30 | ||
| @@ -70,7 +70,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 70 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 70 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 71 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | 71 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 72 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | 72 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); |
| 73 | tmp_buffer.resize(dst_size); | 73 | tmp_buffer.resize_destructive(dst_size); |
| 74 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 74 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 75 | Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, | 75 | Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, |
| 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 94fafd9dc..7242d2529 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -4,9 +4,10 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <span> | 6 | #include <span> |
| 7 | #include <vector> | 7 | |
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | ||
| 10 | 11 | ||
| 11 | namespace Tegra { | 12 | namespace Tegra { |
| 12 | class MemoryManager; | 13 | class MemoryManager; |
| @@ -73,8 +74,8 @@ private: | |||
| 73 | 74 | ||
| 74 | u32 write_offset = 0; | 75 | u32 write_offset = 0; |
| 75 | u32 copy_size = 0; | 76 | u32 copy_size = 0; |
| 76 | std::vector<u8> inner_buffer; | 77 | Common::ScratchBuffer<u8> inner_buffer; |
| 77 | std::vector<u8> tmp_buffer; | 78 | Common::ScratchBuffer<u8> tmp_buffer; |
| 78 | bool is_linear = false; | 79 | bool is_linear = false; |
| 79 | Registers& regs; | 80 | Registers& regs; |
| 80 | MemoryManager& memory_manager; | 81 | MemoryManager& memory_manager; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a189e60ae..f73d7bf0f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -184,12 +184,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 184 | const size_t src_size = | 184 | const size_t src_size = |
| 185 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 185 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 186 | 186 | ||
| 187 | if (read_buffer.size() < src_size) { | 187 | read_buffer.resize_destructive(src_size); |
| 188 | read_buffer.resize(src_size); | 188 | write_buffer.resize_destructive(dst_size); |
| 189 | } | ||
| 190 | if (write_buffer.size() < dst_size) { | ||
| 191 | write_buffer.resize(dst_size); | ||
| 192 | } | ||
| 193 | 189 | ||
| 194 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 190 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 195 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 191 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| @@ -235,12 +231,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 235 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 231 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 236 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | 232 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; |
| 237 | 233 | ||
| 238 | if (read_buffer.size() < src_size) { | 234 | read_buffer.resize_destructive(src_size); |
| 239 | read_buffer.resize(src_size); | 235 | write_buffer.resize_destructive(dst_size); |
| 240 | } | ||
| 241 | if (write_buffer.size() < dst_size) { | ||
| 242 | write_buffer.resize(dst_size); | ||
| 243 | } | ||
| 244 | 236 | ||
| 245 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 237 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 246 | if (Settings::IsGPULevelExtreme()) { | 238 | if (Settings::IsGPULevelExtreme()) { |
| @@ -269,12 +261,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 269 | pos_x = pos_x % x_in_gob; | 261 | pos_x = pos_x % x_in_gob; |
| 270 | pos_y = pos_y % 8; | 262 | pos_y = pos_y % 8; |
| 271 | 263 | ||
| 272 | if (read_buffer.size() < src_size) { | 264 | read_buffer.resize_destructive(src_size); |
| 273 | read_buffer.resize(src_size); | 265 | write_buffer.resize_destructive(dst_size); |
| 274 | } | ||
| 275 | if (write_buffer.size() < dst_size) { | ||
| 276 | write_buffer.resize(dst_size); | ||
| 277 | } | ||
| 278 | 266 | ||
| 279 | if (Settings::IsGPULevelExtreme()) { | 267 | if (Settings::IsGPULevelExtreme()) { |
| 280 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); | 268 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); |
| @@ -333,14 +321,10 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 333 | const u32 pitch = x_elements * bytes_per_pixel; | 321 | const u32 pitch = x_elements * bytes_per_pixel; |
| 334 | const size_t mid_buffer_size = pitch * regs.line_count; | 322 | const size_t mid_buffer_size = pitch * regs.line_count; |
| 335 | 323 | ||
| 336 | if (read_buffer.size() < src_size) { | 324 | read_buffer.resize_destructive(src_size); |
| 337 | read_buffer.resize(src_size); | 325 | write_buffer.resize_destructive(dst_size); |
| 338 | } | ||
| 339 | if (write_buffer.size() < dst_size) { | ||
| 340 | write_buffer.resize(dst_size); | ||
| 341 | } | ||
| 342 | 326 | ||
| 343 | intermediate_buffer.resize(mid_buffer_size); | 327 | intermediate_buffer.resize_destructive(mid_buffer_size); |
| 344 | 328 | ||
| 345 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 329 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 346 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 330 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index d40d3d302..c88191a61 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -6,8 +6,10 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | |||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/scratch_buffer.h" | ||
| 11 | #include "video_core/engines/engine_interface.h" | 13 | #include "video_core/engines/engine_interface.h" |
| 12 | 14 | ||
| 13 | namespace Core { | 15 | namespace Core { |
| @@ -234,9 +236,9 @@ private: | |||
| 234 | MemoryManager& memory_manager; | 236 | MemoryManager& memory_manager; |
| 235 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 237 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 236 | 238 | ||
| 237 | std::vector<u8> read_buffer; | 239 | Common::ScratchBuffer<u8> read_buffer; |
| 238 | std::vector<u8> write_buffer; | 240 | Common::ScratchBuffer<u8> write_buffer; |
| 239 | std::vector<u8> intermediate_buffer; | 241 | Common::ScratchBuffer<u8> intermediate_buffer; |
| 240 | 242 | ||
| 241 | static constexpr std::size_t NUM_REGS = 0x800; | 243 | static constexpr std::size_t NUM_REGS = 0x800; |
| 242 | struct Regs { | 244 | struct Regs { |
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index ac0b7d20e..36a04e4e0 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -155,7 +155,7 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 155 | // swizzle pitch linear to block linear | 155 | // swizzle pitch linear to block linear |
| 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); |
| 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); |
| 158 | luma_buffer.resize(size); | 158 | luma_buffer.resize_destructive(size); |
| 159 | std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height); | 159 | std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height); |
| 160 | Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, | 160 | Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, |
| 161 | block_height, 0, width * 4); | 161 | block_height, 0, width * 4); |
| @@ -181,8 +181,8 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 181 | 181 | ||
| 182 | const auto stride = static_cast<size_t>(frame->linesize[0]); | 182 | const auto stride = static_cast<size_t>(frame->linesize[0]); |
| 183 | 183 | ||
| 184 | luma_buffer.resize(aligned_width * surface_height); | 184 | luma_buffer.resize_destructive(aligned_width * surface_height); |
| 185 | chroma_buffer.resize(aligned_width * surface_height / 2); | 185 | chroma_buffer.resize_destructive(aligned_width * surface_height / 2); |
| 186 | 186 | ||
| 187 | // Populate luma buffer | 187 | // Populate luma buffer |
| 188 | const u8* luma_src = frame->data[0]; | 188 | const u8* luma_src = frame->data[0]; |
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h index 2b78786e8..3d9753047 100644 --- a/src/video_core/host1x/vic.h +++ b/src/video_core/host1x/vic.h | |||
| @@ -4,8 +4,9 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <vector> | 7 | |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "common/scratch_buffer.h" | ||
| 9 | 10 | ||
| 10 | struct SwsContext; | 11 | struct SwsContext; |
| 11 | 12 | ||
| @@ -49,8 +50,8 @@ private: | |||
| 49 | /// size does not change during a stream | 50 | /// size does not change during a stream |
| 50 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; | 51 | using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>; |
| 51 | AVMallocPtr converted_frame_buffer; | 52 | AVMallocPtr converted_frame_buffer; |
| 52 | std::vector<u8> luma_buffer; | 53 | Common::ScratchBuffer<u8> luma_buffer; |
| 53 | std::vector<u8> chroma_buffer; | 54 | Common::ScratchBuffer<u8> chroma_buffer; |
| 54 | 55 | ||
| 55 | GPUVAddr config_struct_address{}; | 56 | GPUVAddr config_struct_address{}; |
| 56 | GPUVAddr output_surface_luma_address{}; | 57 | GPUVAddr output_surface_luma_address{}; |