diff options
| author | 2022-08-14 02:36:36 -0700 | |
|---|---|---|
| committer | 2022-10-06 21:00:53 +0200 | |
| commit | f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 (patch) | |
| tree | 5156a04816d6556b8babe7d69301f18098b8dd1d /src | |
| parent | Maxwell3D: Add small_index_2 (diff) | |
| download | yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.gz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.xz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.zip | |
DMA & InlineToMemory Engines Rework.
Diffstat (limited to 'src')
21 files changed, 323 insertions, 242 deletions
diff --git a/src/common/algorithm.h b/src/common/algorithm.h index 9ddfd637b..055dca142 100644 --- a/src/common/algorithm.h +++ b/src/common/algorithm.h | |||
| @@ -24,4 +24,12 @@ template <class ForwardIt, class T, class Compare = std::less<>> | |||
| 24 | return first != last && !comp(value, *first) ? first : last; | 24 | return first != last && !comp(value, *first) ? first : last; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | template <typename T, typename Func, typename... Args> | ||
| 28 | T FoldRight(T initial_value, Func&& func, Args&&... args) { | ||
| 29 | T value{initial_value}; | ||
| 30 | const auto high_func = [&value, &func]<typename T>(T x) { value = func(value, x); }; | ||
| 31 | (std::invoke(high_func, std::forward<Args>(args)), ...); | ||
| 32 | return value; | ||
| 33 | } | ||
| 34 | |||
| 27 | } // namespace Common | 35 | } // namespace Common |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e55cac0d6..359c11d6f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -126,7 +126,7 @@ public: | |||
| 126 | 126 | ||
| 127 | void DownloadMemory(VAddr cpu_addr, u64 size); | 127 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 128 | 128 | ||
| 129 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); | 129 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |
| 130 | 130 | ||
| 131 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | 131 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 132 | 132 | ||
| @@ -1685,7 +1685,7 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | |||
| 1685 | 1685 | ||
| 1686 | template <class P> | 1686 | template <class P> |
| 1687 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | 1687 | bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, |
| 1688 | std::span<u8> inlined_buffer) { | 1688 | std::span<const u8> inlined_buffer) { |
| 1689 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); | 1689 | const bool is_dirty = IsRegionRegistered(dest_address, copy_size); |
| 1690 | if (!is_dirty) { | 1690 | if (!is_dirty) { |
| 1691 | return false; | 1691 | return false; |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 6ff5b1eca..a34819234 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <cstring> | 4 | #include <cstring> |
| 5 | 5 | ||
| 6 | #include "common/algorithm.h" | ||
| 6 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 7 | #include "video_core/engines/engine_upload.h" | 8 | #include "video_core/engines/engine_upload.h" |
| 8 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| @@ -34,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | |||
| 34 | if (!is_last_call) { | 35 | if (!is_last_call) { |
| 35 | return; | 36 | return; |
| 36 | } | 37 | } |
| 38 | ProcessData(inner_buffer); | ||
| 39 | } | ||
| 40 | |||
| 41 | void State::ProcessData(const u32* data, size_t num_data) { | ||
| 42 | std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32)); | ||
| 43 | ProcessData(read_buffer); | ||
| 44 | } | ||
| 45 | |||
| 46 | void State::ProcessData(std::span<const u8> read_buffer) { | ||
| 37 | const GPUVAddr address{regs.dest.Address()}; | 47 | const GPUVAddr address{regs.dest.Address()}; |
| 38 | if (is_linear) { | 48 | if (is_linear) { |
| 39 | rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer); | 49 | if (regs.line_count == 1) { |
| 50 | rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); | ||
| 51 | } else { | ||
| 52 | for (u32 line = 0; line < regs.line_count; ++line) { | ||
| 53 | const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; | ||
| 54 | memory_manager.WriteBlockUnsafe( | ||
| 55 | dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in, | ||
| 56 | regs.line_length_in); | ||
| 57 | } | ||
| 58 | memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count); | ||
| 59 | } | ||
| 40 | } else { | 60 | } else { |
| 41 | UNIMPLEMENTED_IF(regs.dest.z != 0); | 61 | u32 width = regs.dest.width; |
| 42 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | 62 | u32 x_elements = regs.line_length_in; |
| 43 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); | 63 | u32 x_offset = regs.dest.x; |
| 44 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); | 64 | const u32 bpp_shift = Common::FoldRight( |
| 65 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 66 | width, x_elements, x_offset, static_cast<u32>(address)); | ||
| 67 | width >>= bpp_shift; | ||
| 68 | x_elements >>= bpp_shift; | ||
| 69 | x_offset >>= bpp_shift; | ||
| 70 | const u32 bytes_per_pixel = 1U << bpp_shift; | ||
| 45 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 71 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 46 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); | 72 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 73 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | ||
| 47 | tmp_buffer.resize(dst_size); | 74 | tmp_buffer.resize(dst_size); |
| 48 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 75 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 49 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | 76 | Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, |
| 50 | regs.dest.BlockHeight(), copy_size, inner_buffer.data(), | 77 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |
| 51 | tmp_buffer.data()); | 78 | x_elements, regs.line_count, regs.dest.BlockHeight(), |
| 79 | regs.dest.BlockDepth(), regs.line_length_in); | ||
| 52 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | 80 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); |
| 53 | } | 81 | } |
| 54 | } | 82 | } |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 94ff3314a..f08f6e36a 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <span> | ||
| 6 | #include <vector> | 7 | #include <vector> |
| 7 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| @@ -33,7 +34,7 @@ struct Registers { | |||
| 33 | u32 width; | 34 | u32 width; |
| 34 | u32 height; | 35 | u32 height; |
| 35 | u32 depth; | 36 | u32 depth; |
| 36 | u32 z; | 37 | u32 layer; |
| 37 | u32 x; | 38 | u32 x; |
| 38 | u32 y; | 39 | u32 y; |
| 39 | 40 | ||
| @@ -62,11 +63,14 @@ public: | |||
| 62 | 63 | ||
| 63 | void ProcessExec(bool is_linear_); | 64 | void ProcessExec(bool is_linear_); |
| 64 | void ProcessData(u32 data, bool is_last_call); | 65 | void ProcessData(u32 data, bool is_last_call); |
| 66 | void ProcessData(const u32* data, size_t num_data); | ||
| 65 | 67 | ||
| 66 | /// Binds a rasterizer to this engine. | 68 | /// Binds a rasterizer to this engine. |
| 67 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | 69 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 68 | 70 | ||
| 69 | private: | 71 | private: |
| 72 | void ProcessData(std::span<const u8> read_buffer); | ||
| 73 | |||
| 70 | u32 write_offset = 0; | 74 | u32 write_offset = 0; |
| 71 | u32 copy_size = 0; | 75 | u32 copy_size = 0; |
| 72 | std::vector<u8> inner_buffer; | 76 | std::vector<u8> inner_buffer; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 5db254d94..7c50bdbe0 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -36,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 36 | } | 36 | } |
| 37 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 37 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 38 | upload_state.ProcessData(method_argument, is_last_call); | 38 | upload_state.ProcessData(method_argument, is_last_call); |
| 39 | if (is_last_call) { | ||
| 40 | } | ||
| 41 | break; | 39 | break; |
| 42 | } | 40 | } |
| 43 | case KEPLER_COMPUTE_REG_INDEX(launch): | 41 | case KEPLER_COMPUTE_REG_INDEX(launch): |
| @@ -50,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 50 | 48 | ||
| 51 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 49 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 52 | u32 methods_pending) { | 50 | u32 methods_pending) { |
| 53 | for (std::size_t i = 0; i < amount; i++) { | 51 | switch (method) { |
| 54 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 52 | case KEPLER_COMPUTE_REG_INDEX(data_upload): |
| 53 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 54 | return; | ||
| 55 | default: | ||
| 56 | for (std::size_t i = 0; i < amount; i++) { | ||
| 57 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||
| 58 | } | ||
| 59 | break; | ||
| 55 | } | 60 | } |
| 56 | } | 61 | } |
| 57 | 62 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index e2b029542..a3fbab1e5 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -33,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 33 | } | 33 | } |
| 34 | case KEPLERMEMORY_REG_INDEX(data): { | 34 | case KEPLERMEMORY_REG_INDEX(data): { |
| 35 | upload_state.ProcessData(method_argument, is_last_call); | 35 | upload_state.ProcessData(method_argument, is_last_call); |
| 36 | if (is_last_call) { | ||
| 37 | } | ||
| 38 | break; | 36 | break; |
| 39 | } | 37 | } |
| 40 | } | 38 | } |
| @@ -42,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 42 | 40 | ||
| 43 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 41 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 44 | u32 methods_pending) { | 42 | u32 methods_pending) { |
| 45 | for (std::size_t i = 0; i < amount; i++) { | 43 | switch (method) { |
| 46 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 44 | case KEPLERMEMORY_REG_INDEX(data): |
| 45 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 46 | return; | ||
| 47 | default: | ||
| 48 | for (std::size_t i = 0; i < amount; i++) { | ||
| 49 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||
| 50 | } | ||
| 51 | break; | ||
| 47 | } | 52 | } |
| 48 | } | 53 | } |
| 49 | 54 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index add1ccebe..632052c53 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -239,8 +239,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 239 | return upload_state.ProcessExec(regs.exec_upload.linear != 0); | 239 | return upload_state.ProcessExec(regs.exec_upload.linear != 0); |
| 240 | case MAXWELL3D_REG_INDEX(data_upload): | 240 | case MAXWELL3D_REG_INDEX(data_upload): |
| 241 | upload_state.ProcessData(argument, is_last_call); | 241 | upload_state.ProcessData(argument, is_last_call); |
| 242 | if (is_last_call) { | ||
| 243 | } | ||
| 244 | return; | 242 | return; |
| 245 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 243 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| 246 | return rasterizer->FragmentBarrier(); | 244 | return rasterizer->FragmentBarrier(); |
| @@ -316,6 +314,9 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | |||
| 316 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: | 314 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: |
| 317 | ProcessCBMultiData(base_start, amount); | 315 | ProcessCBMultiData(base_start, amount); |
| 318 | break; | 316 | break; |
| 317 | case MAXWELL3D_REG_INDEX(data_upload): | ||
| 318 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 319 | return; | ||
| 319 | default: | 320 | default: |
| 320 | for (std::size_t i = 0; i < amount; i++) { | 321 | for (std::size_t i = 0; i < amount; i++) { |
| 321 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 322 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0efe58282..a12a95ce2 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/algorithm.h" | ||
| 4 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 5 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 6 | #include "common/microprofile.h" | 7 | #include "common/microprofile.h" |
| @@ -54,8 +55,6 @@ void MaxwellDMA::Launch() { | |||
| 54 | const LaunchDMA& launch = regs.launch_dma; | 55 | const LaunchDMA& launch = regs.launch_dma; |
| 55 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | 56 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 56 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | 57 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| 57 | ASSERT(regs.dst_params.origin.x == 0); | ||
| 58 | ASSERT(regs.dst_params.origin.y == 0); | ||
| 59 | 58 | ||
| 60 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 59 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 61 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 60 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| @@ -121,12 +120,13 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 121 | 120 | ||
| 122 | void MaxwellDMA::CopyBlockLinearToPitch() { | 121 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 123 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | 122 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); |
| 124 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | ||
| 125 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); | 123 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); |
| 126 | 124 | ||
| 125 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 126 | |||
| 127 | // Optimized path for micro copies. | 127 | // Optimized path for micro copies. |
| 128 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 128 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 129 | if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && | 129 | if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && |
| 130 | regs.src_params.height > GOB_SIZE_Y) { | 130 | regs.src_params.height > GOB_SIZE_Y) { |
| 131 | FastCopyBlockLinearToPitch(); | 131 | FastCopyBlockLinearToPitch(); |
| 132 | return; | 132 | return; |
| @@ -134,10 +134,27 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 134 | 134 | ||
| 135 | // Deswizzle the input and copy it over. | 135 | // Deswizzle the input and copy it over. |
| 136 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | 136 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); |
| 137 | const u32 bytes_per_pixel = | ||
| 138 | regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; | ||
| 139 | const Parameters& src_params = regs.src_params; | 137 | const Parameters& src_params = regs.src_params; |
| 140 | const u32 width = src_params.width; | 138 | |
| 139 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 140 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 141 | |||
| 142 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | ||
| 143 | |||
| 144 | u32 width = src_params.width; | ||
| 145 | u32 x_elements = regs.line_length_in; | ||
| 146 | u32 x_offset = src_params.origin.x; | ||
| 147 | u32 bpp_shift = 0U; | ||
| 148 | if (!is_remapping) { | ||
| 149 | bpp_shift = Common::FoldRight( | ||
| 150 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 151 | width, x_elements, x_offset, static_cast<u32>(regs.offset_in)); | ||
| 152 | width >>= bpp_shift; | ||
| 153 | x_elements >>= bpp_shift; | ||
| 154 | x_offset >>= bpp_shift; | ||
| 155 | } | ||
| 156 | |||
| 157 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 141 | const u32 height = src_params.height; | 158 | const u32 height = src_params.height; |
| 142 | const u32 depth = src_params.depth; | 159 | const u32 depth = src_params.depth; |
| 143 | const u32 block_height = src_params.block_size.height; | 160 | const u32 block_height = src_params.block_size.height; |
| @@ -155,30 +172,46 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 155 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 172 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 156 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 173 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 157 | 174 | ||
| 158 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, | 175 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 159 | block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), | 176 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 160 | read_buffer.data()); | 177 | regs.pitch_out); |
| 161 | 178 | ||
| 162 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 179 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 163 | } | 180 | } |
| 164 | 181 | ||
| 165 | void MaxwellDMA::CopyPitchToBlockLinear() { | 182 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 166 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | 183 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); |
| 184 | UNIMPLEMENTED_IF(regs.dst_params.layer != 0); | ||
| 167 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | 185 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); |
| 168 | 186 | ||
| 187 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 188 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 189 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 190 | |||
| 169 | const auto& dst_params = regs.dst_params; | 191 | const auto& dst_params = regs.dst_params; |
| 170 | const u32 bytes_per_pixel = | 192 | |
| 171 | regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1; | 193 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; |
| 172 | const u32 width = dst_params.width; | 194 | |
| 195 | u32 width = dst_params.width; | ||
| 196 | u32 x_elements = regs.line_length_in; | ||
| 197 | u32 x_offset = dst_params.origin.x; | ||
| 198 | u32 bpp_shift = 0U; | ||
| 199 | if (!is_remapping) { | ||
| 200 | bpp_shift = Common::FoldRight( | ||
| 201 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 202 | width, x_elements, x_offset, static_cast<u32>(regs.offset_out)); | ||
| 203 | width >>= bpp_shift; | ||
| 204 | x_elements >>= bpp_shift; | ||
| 205 | x_offset >>= bpp_shift; | ||
| 206 | } | ||
| 207 | |||
| 208 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 173 | const u32 height = dst_params.height; | 209 | const u32 height = dst_params.height; |
| 174 | const u32 depth = dst_params.depth; | 210 | const u32 depth = dst_params.depth; |
| 175 | const u32 block_height = dst_params.block_size.height; | 211 | const u32 block_height = dst_params.block_size.height; |
| 176 | const u32 block_depth = dst_params.block_size.depth; | 212 | const u32 block_depth = dst_params.block_size.depth; |
| 177 | const size_t dst_size = | 213 | const size_t dst_size = |
| 178 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 214 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 179 | const size_t dst_layer_size = | ||
| 180 | CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); | ||
| 181 | |||
| 182 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | 215 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; |
| 183 | 216 | ||
| 184 | if (read_buffer.size() < src_size) { | 217 | if (read_buffer.size() < src_size) { |
| @@ -188,32 +221,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 188 | write_buffer.resize(dst_size); | 221 | write_buffer.resize(dst_size); |
| 189 | } | 222 | } |
| 190 | 223 | ||
| 224 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 191 | if (Settings::IsGPULevelExtreme()) { | 225 | if (Settings::IsGPULevelExtreme()) { |
| 192 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 193 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 226 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 194 | } else { | 227 | } else { |
| 195 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); | ||
| 196 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 228 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |
| 197 | } | 229 | } |
| 198 | 230 | ||
| 199 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 231 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 200 | if (regs.dst_params.block_size.depth > 0) { | 232 | SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 201 | ASSERT(dst_params.layer == 0); | 233 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 202 | SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, | 234 | regs.pitch_in); |
| 203 | bytes_per_pixel, block_height, block_depth, dst_params.origin.x, | ||
| 204 | dst_params.origin.y, write_buffer.data(), read_buffer.data()); | ||
| 205 | } else { | ||
| 206 | SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, | ||
| 207 | write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), | ||
| 208 | block_height, dst_params.origin.x, dst_params.origin.y); | ||
| 209 | } | ||
| 210 | 235 | ||
| 211 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 236 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 212 | } | 237 | } |
| 213 | 238 | ||
| 214 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | 239 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| 215 | const u32 bytes_per_pixel = | 240 | const u32 bytes_per_pixel = 1U; |
| 216 | regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; | ||
| 217 | const size_t src_size = GOB_SIZE; | 241 | const size_t src_size = GOB_SIZE; |
| 218 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 242 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 219 | u32 pos_x = regs.src_params.origin.x; | 243 | u32 pos_x = regs.src_params.origin.x; |
| @@ -239,9 +263,10 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 239 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 263 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |
| 240 | } | 264 | } |
| 241 | 265 | ||
| 242 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, | 266 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, |
| 243 | bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, | 267 | regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, |
| 244 | write_buffer.data(), read_buffer.data()); | 268 | regs.src_params.block_size.height, regs.src_params.block_size.depth, |
| 269 | regs.pitch_out); | ||
| 245 | 270 | ||
| 246 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 271 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 247 | } | 272 | } |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 074bac92c..9c5d567a6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -189,10 +189,16 @@ public: | |||
| 189 | BitField<4, 3, Swizzle> dst_y; | 189 | BitField<4, 3, Swizzle> dst_y; |
| 190 | BitField<8, 3, Swizzle> dst_z; | 190 | BitField<8, 3, Swizzle> dst_z; |
| 191 | BitField<12, 3, Swizzle> dst_w; | 191 | BitField<12, 3, Swizzle> dst_w; |
| 192 | BitField<0, 12, u32> dst_components_raw; | ||
| 192 | BitField<16, 2, u32> component_size_minus_one; | 193 | BitField<16, 2, u32> component_size_minus_one; |
| 193 | BitField<20, 2, u32> num_src_components_minus_one; | 194 | BitField<20, 2, u32> num_src_components_minus_one; |
| 194 | BitField<24, 2, u32> num_dst_components_minus_one; | 195 | BitField<24, 2, u32> num_dst_components_minus_one; |
| 195 | }; | 196 | }; |
| 197 | |||
| 198 | Swizzle GetComponent(size_t i) { | ||
| 199 | const u32 raw = dst_components_raw; | ||
| 200 | return static_cast<Swizzle>((raw >> (i * 3)) & 0x7); | ||
| 201 | } | ||
| 196 | }; | 202 | }; |
| 197 | static_assert(sizeof(RemapConst) == 12); | 203 | static_assert(sizeof(RemapConst) == 12); |
| 198 | 204 | ||
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 5d8039841..b9ac41529 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp | |||
| @@ -156,8 +156,9 @@ void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) { | |||
| 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); | 156 | const u32 block_height = static_cast<u32>(config.block_linear_height_log2); |
| 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); | 157 | const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0); |
| 158 | luma_buffer.resize(size); | 158 | luma_buffer.resize(size); |
| 159 | Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(), | 159 | std::span<const u8> frame_buff(converted_frame_buf_addr, 4 * width * height); |
| 160 | converted_frame_buf_addr, block_height, 0, 0); | 160 | Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, |
| 161 | 0, 0, width, height, block_height, 0, width * 4); | ||
| 161 | 162 | ||
| 162 | host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); | 163 | host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); |
| 163 | } else { | 164 | } else { |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 4e52ce0fd..4a692448e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -462,6 +462,97 @@ void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { | |||
| 462 | MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); | 462 | MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); |
| 463 | } | 463 | } |
| 464 | 464 | ||
| 465 | bool MemoryManager::IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const { | ||
| 466 | bool result = false; | ||
| 467 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | ||
| 468 | [[maybe_unused]] std::size_t offset, | ||
| 469 | [[maybe_unused]] std::size_t copy_amount) { return false; }; | ||
| 470 | |||
| 471 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 472 | const VAddr cpu_addr_base = | ||
| 473 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||
| 474 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); | ||
| 475 | return result; | ||
| 476 | }; | ||
| 477 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 478 | const VAddr cpu_addr_base = | ||
| 479 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||
| 480 | result |= rasterizer->MustFlushRegion(cpu_addr_base, copy_amount); | ||
| 481 | return result; | ||
| 482 | }; | ||
| 483 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 484 | std::size_t copy_amount) { | ||
| 485 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 486 | MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing); | ||
| 487 | return result; | ||
| 488 | }; | ||
| 489 | MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, check_short_pages); | ||
| 490 | return result; | ||
| 491 | } | ||
| 492 | |||
| 493 | size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { | ||
| 494 | std::optional<VAddr> old_page_addr{}; | ||
| 495 | size_t range_so_far = 0; | ||
| 496 | bool result{false}; | ||
| 497 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||
| 498 | std::size_t copy_amount) { | ||
| 499 | result = true; | ||
| 500 | return true; | ||
| 501 | }; | ||
| 502 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 503 | const VAddr cpu_addr_base = | ||
| 504 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||
| 505 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||
| 506 | result = true; | ||
| 507 | return true; | ||
| 508 | } | ||
| 509 | range_so_far += copy_amount; | ||
| 510 | old_page_addr = {cpu_addr_base + copy_amount}; | ||
| 511 | return false; | ||
| 512 | }; | ||
| 513 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 514 | const VAddr cpu_addr_base = | ||
| 515 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||
| 516 | if (old_page_addr && *old_page_addr != cpu_addr_base) { | ||
| 517 | return true; | ||
| 518 | } | ||
| 519 | range_so_far += copy_amount; | ||
| 520 | old_page_addr = {cpu_addr_base + copy_amount}; | ||
| 521 | return false; | ||
| 522 | }; | ||
| 523 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 524 | std::size_t copy_amount) { | ||
| 525 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 526 | MemoryOperation<false>(base, copy_amount, short_check, fail, fail); | ||
| 527 | return result; | ||
| 528 | }; | ||
| 529 | MemoryOperation<true>(gpu_addr, size, big_check, fail, check_short_pages); | ||
| 530 | return range_so_far; | ||
| 531 | } | ||
| 532 | |||
| 533 | void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { | ||
| 534 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | ||
| 535 | [[maybe_unused]] std::size_t offset, | ||
| 536 | [[maybe_unused]] std::size_t copy_amount) {}; | ||
| 537 | |||
| 538 | auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 539 | const VAddr cpu_addr_base = | ||
| 540 | (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||
| 541 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); | ||
| 542 | }; | ||
| 543 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 544 | const VAddr cpu_addr_base = | ||
| 545 | (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||
| 546 | rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); | ||
| 547 | }; | ||
| 548 | auto invalidate_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 549 | std::size_t copy_amount) { | ||
| 550 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 551 | MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing); | ||
| 552 | }; | ||
| 553 | MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, invalidate_short_pages); | ||
| 554 | } | ||
| 555 | |||
| 465 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { | 556 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { |
| 466 | std::vector<u8> tmp_buffer(size); | 557 | std::vector<u8> tmp_buffer(size); |
| 467 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size); | 558 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size); |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 8f8877a92..9c08edc20 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -104,6 +104,12 @@ public: | |||
| 104 | 104 | ||
| 105 | void FlushRegion(GPUVAddr gpu_addr, size_t size) const; | 105 | void FlushRegion(GPUVAddr gpu_addr, size_t size) const; |
| 106 | 106 | ||
| 107 | void InvalidateRegion(GPUVAddr gpu_addr, size_t size) const; | ||
| 108 | |||
| 109 | bool IsMemoryDirty(GPUVAddr gpu_addr, size_t size) const; | ||
| 110 | |||
| 111 | size_t MaxContinousRange(GPUVAddr gpu_addr, size_t size) const; | ||
| 112 | |||
| 107 | private: | 113 | private: |
| 108 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | 114 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
| 109 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | 115 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index cb07f3d38..d2d40884c 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -129,7 +129,7 @@ public: | |||
| 129 | [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; | 129 | [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; |
| 130 | 130 | ||
| 131 | virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 131 | virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 132 | std::span<u8> memory) = 0; | 132 | std::span<const u8> memory) = 0; |
| 133 | 133 | ||
| 134 | /// Attempt to use a faster method to display the framebuffer to screen | 134 | /// Attempt to use a faster method to display the framebuffer to screen |
| 135 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | 135 | [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 02bb17715..c2d80605d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -476,7 +476,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() | |||
| 476 | } | 476 | } |
| 477 | 477 | ||
| 478 | void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 478 | void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 479 | std::span<u8> memory) { | 479 | std::span<const u8> memory) { |
| 480 | auto cpu_addr = gpu_memory->GpuToCpuAddress(address); | 480 | auto cpu_addr = gpu_memory->GpuToCpuAddress(address); |
| 481 | if (!cpu_addr) [[unlikely]] { | 481 | if (!cpu_addr) [[unlikely]] { |
| 482 | gpu_memory->WriteBlock(address, memory.data(), copy_size); | 482 | gpu_memory->WriteBlock(address, memory.data(), copy_size); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index fe0ba979a..45131b785 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -99,7 +99,7 @@ public: | |||
| 99 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 99 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 100 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 100 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 101 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 101 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 102 | std::span<u8> memory) override; | 102 | std::span<const u8> memory) override; |
| 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 104 | u32 pixel_stride) override; | 104 | u32 pixel_stride) override; |
| 105 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 105 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index f17a5ccd6..241d7573e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -26,8 +26,6 @@ | |||
| 26 | 26 | ||
| 27 | namespace Vulkan { | 27 | namespace Vulkan { |
| 28 | 28 | ||
| 29 | using Tegra::Texture::SWIZZLE_TABLE; | ||
| 30 | |||
| 31 | namespace { | 29 | namespace { |
| 32 | 30 | ||
| 33 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; | 31 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a35e41199..acfd5da7d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -548,7 +548,7 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() | |||
| 548 | } | 548 | } |
| 549 | 549 | ||
| 550 | void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 550 | void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 551 | std::span<u8> memory) { | 551 | std::span<const u8> memory) { |
| 552 | auto cpu_addr = gpu_memory->GpuToCpuAddress(address); | 552 | auto cpu_addr = gpu_memory->GpuToCpuAddress(address); |
| 553 | if (!cpu_addr) [[unlikely]] { | 553 | if (!cpu_addr) [[unlikely]] { |
| 554 | gpu_memory->WriteBlock(address, memory.data(), copy_size); | 554 | gpu_memory->WriteBlock(address, memory.data(), copy_size); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fb9e83e8f..4cde3c983 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -95,7 +95,7 @@ public: | |||
| 95 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 95 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 96 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 96 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 97 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, | 97 | void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, |
| 98 | std::span<u8> memory) override; | 98 | std::span<const u8> memory) override; |
| 99 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 99 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 100 | u32 pixel_stride) override; | 100 | u32 pixel_stride) override; |
| 101 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | 101 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index bea1c27d0..1223df5a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -517,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 517 | const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; | 517 | const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; |
| 518 | 518 | ||
| 519 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | 519 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); |
| 520 | |||
| 521 | UNIMPLEMENTED_IF(copy.image_offset.x != 0); | 520 | UNIMPLEMENTED_IF(copy.image_offset.x != 0); |
| 522 | UNIMPLEMENTED_IF(copy.image_offset.y != 0); | 521 | UNIMPLEMENTED_IF(copy.image_offset.y != 0); |
| 523 | UNIMPLEMENTED_IF(copy.image_offset.z != 0); | 522 | UNIMPLEMENTED_IF(copy.image_offset.z != 0); |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 913f8ebcb..fcc636e0b 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -89,6 +89,69 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 | |||
| 89 | } | 89 | } |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> | ||
| 93 | void SwizzleSubrectImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, | ||
| 94 | u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 num_lines, | ||
| 95 | u32 block_height, u32 block_depth, u32 pitch_linear) { | ||
| 96 | // The origin of the transformation can be configured here, leave it as zero as the current API | ||
| 97 | // doesn't expose it. | ||
| 98 | static constexpr u32 origin_z = 0; | ||
| 99 | |||
| 100 | // We can configure here a custom pitch | ||
| 101 | // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. | ||
| 102 | const u32 pitch = pitch_linear; | ||
| 103 | const u32 stride = Common::AlignUpLog2(width * BYTES_PER_PIXEL, GOB_SIZE_X_SHIFT); | ||
| 104 | |||
| 105 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||
| 106 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 107 | const u32 slice_size = | ||
| 108 | Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; | ||
| 109 | |||
| 110 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 111 | const u32 block_depth_mask = (1U << block_depth) - 1; | ||
| 112 | const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; | ||
| 113 | |||
| 114 | u32 unprocessed_lines = num_lines; | ||
| 115 | u32 extent_y = std::min(num_lines, height - origin_y); | ||
| 116 | |||
| 117 | for (u32 slice = 0; slice < depth; ++slice) { | ||
| 118 | const u32 z = slice + origin_z; | ||
| 119 | const u32 offset_z = (z >> block_depth) * slice_size + | ||
| 120 | ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); | ||
| 121 | const u32 lines_in_y = std::min(unprocessed_lines, extent_y); | ||
| 122 | for (u32 line = 0; line < lines_in_y; ++line) { | ||
| 123 | const u32 y = line + origin_y; | ||
| 124 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(y); | ||
| 125 | |||
| 126 | const u32 block_y = y >> GOB_SIZE_Y_SHIFT; | ||
| 127 | const u32 offset_y = (block_y >> block_height) * block_size + | ||
| 128 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); | ||
| 129 | |||
| 130 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL); | ||
| 131 | for (u32 column = 0; column < extent_x; | ||
| 132 | ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 133 | const u32 x = (column + origin_x) * BYTES_PER_PIXEL; | ||
| 134 | const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 135 | |||
| 136 | const u32 base_swizzled_offset = offset_z + offset_y + offset_x; | ||
| 137 | const u32 swizzled_offset = base_swizzled_offset + (swizzled_x | swizzled_y); | ||
| 138 | |||
| 139 | const u32 unswizzled_offset = | ||
| 140 | slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; | ||
| 141 | |||
| 142 | u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; | ||
| 143 | const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; | ||
| 144 | |||
| 145 | std::memcpy(dst, src, BYTES_PER_PIXEL); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | unprocessed_lines -= lines_in_y; | ||
| 149 | if (unprocessed_lines == 0) { | ||
| 150 | return; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 92 | template <bool TO_LINEAR> | 155 | template <bool TO_LINEAR> |
| 93 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | 156 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 94 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | 157 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { |
| @@ -111,97 +174,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | |||
| 111 | } | 174 | } |
| 112 | } | 175 | } |
| 113 | 176 | ||
| 114 | template <u32 BYTES_PER_PIXEL> | ||
| 115 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | ||
| 116 | u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, | ||
| 117 | u32 offset_x, u32 offset_y) { | ||
| 118 | const u32 block_height = 1U << block_height_bit; | ||
| 119 | const u32 image_width_in_gobs = | ||
| 120 | (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; | ||
| 121 | for (u32 line = 0; line < subrect_height; ++line) { | ||
| 122 | const u32 dst_y = line + offset_y; | ||
| 123 | const u32 gob_address_y = | ||
| 124 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | ||
| 125 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | ||
| 126 | |||
| 127 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(dst_y); | ||
| 128 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(offset_x * BYTES_PER_PIXEL); | ||
| 129 | for (u32 x = 0; x < subrect_width; | ||
| 130 | ++x, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 131 | const u32 dst_x = x + offset_x; | ||
| 132 | const u32 gob_address = | ||
| 133 | gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; | ||
| 134 | const u32 swizzled_offset = gob_address + (swizzled_x | swizzled_y); | ||
| 135 | const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; | ||
| 136 | |||
| 137 | const u8* const source_line = unswizzled_data + unswizzled_offset; | ||
| 138 | u8* const dest_addr = swizzled_data + swizzled_offset; | ||
| 139 | std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | template <u32 BYTES_PER_PIXEL> | ||
| 145 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, | ||
| 146 | u32 origin_x, u32 origin_y, u8* output, const u8* input) { | ||
| 147 | const u32 stride = width * BYTES_PER_PIXEL; | ||
| 148 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 149 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | ||
| 150 | |||
| 151 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 152 | const u32 x_shift = GOB_SIZE_SHIFT + block_height; | ||
| 153 | |||
| 154 | for (u32 line = 0; line < line_count; ++line) { | ||
| 155 | const u32 src_y = line + origin_y; | ||
| 156 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(src_y); | ||
| 157 | |||
| 158 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; | ||
| 159 | const u32 src_offset_y = (block_y >> block_height) * block_size + | ||
| 160 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); | ||
| 161 | |||
| 162 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(origin_x * BYTES_PER_PIXEL); | ||
| 163 | for (u32 column = 0; column < line_length_in; | ||
| 164 | ++column, incrpdep<SWIZZLE_X_BITS, BYTES_PER_PIXEL>(swizzled_x)) { | ||
| 165 | const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; | ||
| 166 | const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 167 | |||
| 168 | const u32 swizzled_offset = src_offset_y + src_offset_x + (swizzled_x | swizzled_y); | ||
| 169 | const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; | ||
| 170 | |||
| 171 | std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | template <u32 BYTES_PER_PIXEL> | ||
| 177 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 178 | u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, | ||
| 179 | const u8* input) { | ||
| 180 | UNIMPLEMENTED_IF(origin_x > 0); | ||
| 181 | UNIMPLEMENTED_IF(origin_y > 0); | ||
| 182 | |||
| 183 | const u32 stride = width * BYTES_PER_PIXEL; | ||
| 184 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; | ||
| 185 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 186 | |||
| 187 | const u32 block_height_mask = (1U << block_height) - 1; | ||
| 188 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; | ||
| 189 | |||
| 190 | for (u32 line = 0; line < line_count; ++line) { | ||
| 191 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(line); | ||
| 192 | const u32 block_y = line / GOB_SIZE_Y; | ||
| 193 | const u32 dst_offset_y = | ||
| 194 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | ||
| 195 | |||
| 196 | u32 swizzled_x = 0; | ||
| 197 | for (u32 x = 0; x < line_length_in; ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) { | ||
| 198 | const u32 dst_offset = | ||
| 199 | ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + (swizzled_x | swizzled_y); | ||
| 200 | const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; | ||
| 201 | std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | } // Anonymous namespace | 177 | } // Anonymous namespace |
| 206 | 178 | ||
| 207 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | 179 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| @@ -218,15 +190,15 @@ void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_p | |||
| 218 | stride_alignment); | 190 | stride_alignment); |
| 219 | } | 191 | } |
| 220 | 192 | ||
| 221 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 193 | void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 222 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, | 194 | u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y, |
| 223 | u32 block_height_bit, u32 offset_x, u32 offset_y) { | 195 | u32 block_height, u32 block_depth, u32 pitch_linear) { |
| 224 | switch (bytes_per_pixel) { | 196 | switch (bytes_per_pixel) { |
| 225 | #define BPP_CASE(x) \ | 197 | #define BPP_CASE(x) \ |
| 226 | case x: \ | 198 | case x: \ |
| 227 | return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ | 199 | return SwizzleSubrectImpl<true, x>(output, input, width, height, depth, origin_x, \ |
| 228 | swizzled_data, unswizzled_data, block_height_bit, offset_x, \ | 200 | origin_y, extent_x, extent_y, block_height, \ |
| 229 | offset_y); | 201 | block_depth, pitch_linear); |
| 230 | BPP_CASE(1) | 202 | BPP_CASE(1) |
| 231 | BPP_CASE(2) | 203 | BPP_CASE(2) |
| 232 | BPP_CASE(3) | 204 | BPP_CASE(3) |
| @@ -241,13 +213,15 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 241 | } | 213 | } |
| 242 | } | 214 | } |
| 243 | 215 | ||
| 244 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, | 216 | void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 245 | u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { | 217 | u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, |
| 218 | u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear) { | ||
| 246 | switch (bytes_per_pixel) { | 219 | switch (bytes_per_pixel) { |
| 247 | #define BPP_CASE(x) \ | 220 | #define BPP_CASE(x) \ |
| 248 | case x: \ | 221 | case x: \ |
| 249 | return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ | 222 | return SwizzleSubrectImpl<false, x>(output, input, width, height, depth, origin_x, \ |
| 250 | origin_x, origin_y, output, input); | 223 | origin_y, extent_x, extent_y, block_height, \ |
| 224 | block_depth, pitch_linear); | ||
| 251 | BPP_CASE(1) | 225 | BPP_CASE(1) |
| 252 | BPP_CASE(2) | 226 | BPP_CASE(2) |
| 253 | BPP_CASE(3) | 227 | BPP_CASE(3) |
| @@ -262,55 +236,6 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 262 | } | 236 | } |
| 263 | } | 237 | } |
| 264 | 238 | ||
| 265 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 266 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 267 | u32 origin_y, u8* output, const u8* input) { | ||
| 268 | switch (bytes_per_pixel) { | ||
| 269 | #define BPP_CASE(x) \ | ||
| 270 | case x: \ | ||
| 271 | return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \ | ||
| 272 | block_height, block_depth, origin_x, origin_y, output, \ | ||
| 273 | input); | ||
| 274 | BPP_CASE(1) | ||
| 275 | BPP_CASE(2) | ||
| 276 | BPP_CASE(3) | ||
| 277 | BPP_CASE(4) | ||
| 278 | BPP_CASE(6) | ||
| 279 | BPP_CASE(8) | ||
| 280 | BPP_CASE(12) | ||
| 281 | BPP_CASE(16) | ||
| 282 | #undef BPP_CASE | ||
| 283 | default: | ||
| 284 | ASSERT_MSG(false, "Invalid bytes_per_pixel={}", bytes_per_pixel); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 289 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, | ||
| 290 | u8* swizzle_data) { | ||
| 291 | const u32 block_height = 1U << block_height_bit; | ||
| 292 | const u32 image_width_in_gobs{(width + GOB_SIZE_X - 1) / GOB_SIZE_X}; | ||
| 293 | std::size_t count = 0; | ||
| 294 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | ||
| 295 | const std::size_t gob_address_y = | ||
| 296 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | ||
| 297 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | ||
| 298 | const u32 swizzled_y = pdep<SWIZZLE_Y_BITS>(static_cast<u32>(y)); | ||
| 299 | u32 swizzled_x = pdep<SWIZZLE_X_BITS>(dst_x); | ||
| 300 | for (std::size_t x = dst_x; x < width && count < copy_size; | ||
| 301 | ++x, incrpdep<SWIZZLE_X_BITS, 1>(swizzled_x)) { | ||
| 302 | const std::size_t gob_address = | ||
| 303 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; | ||
| 304 | const std::size_t swizzled_offset = gob_address + (swizzled_x | swizzled_y); | ||
| 305 | const u8* source_line = source_data + count; | ||
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | ||
| 307 | count++; | ||
| 308 | |||
| 309 | *dest_addr = *source_line; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 239 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 315 | u32 block_height, u32 block_depth) { | 240 | u32 block_height, u32 block_depth) { |
| 316 | if (tiled) { | 241 | if (tiled) { |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 31a11708f..e70407692 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -40,7 +40,6 @@ constexpr SwizzleTable MakeSwizzleTable() { | |||
| 40 | } | 40 | } |
| 41 | return table; | 41 | return table; |
| 42 | } | 42 | } |
| 43 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable(); | ||
| 44 | 43 | ||
| 45 | /// Unswizzles a block linear texture into linear memory. | 44 | /// Unswizzles a block linear texture into linear memory. |
| 46 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | 45 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| @@ -57,34 +56,14 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 57 | u32 block_height, u32 block_depth); | 56 | u32 block_height, u32 block_depth); |
| 58 | 57 | ||
| 59 | /// Copies an untiled subrectangle into a tiled surface. | 58 | /// Copies an untiled subrectangle into a tiled surface. |
| 60 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 59 | void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 61 | u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, | 60 | u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, u32 extent_y, |
| 62 | u32 block_height_bit, u32 offset_x, u32 offset_y); | 61 | u32 block_height, u32 block_depth, u32 pitch_linear); |
| 63 | 62 | ||
| 64 | /// Copies a tiled subrectangle into a linear surface. | 63 | /// Copies a tiled subrectangle into a linear surface. |
| 65 | void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, | 64 | void UnswizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 66 | u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input); | 65 | u32 width, u32 height, u32 depth, u32 origin_x, u32 origin_y, u32 extent_x, |
| 67 | 66 | u32 extent_y, u32 block_height, u32 block_depth, u32 pitch_linear); | |
| 68 | /// @brief Swizzles a 2D array of pixels into a 3D texture | ||
| 69 | /// @param line_length_in Number of pixels per line | ||
| 70 | /// @param line_count Number of lines | ||
| 71 | /// @param pitch Number of bytes per line | ||
| 72 | /// @param width Width of the swizzled texture | ||
| 73 | /// @param height Height of the swizzled texture | ||
| 74 | /// @param bytes_per_pixel Number of bytes used per pixel | ||
| 75 | /// @param block_height Block height shift | ||
| 76 | /// @param block_depth Block depth shift | ||
| 77 | /// @param origin_x Column offset in pixels of the swizzled texture | ||
| 78 | /// @param origin_y Row offset in pixels of the swizzled texture | ||
| 79 | /// @param output Pointer to the pixels of the swizzled texture | ||
| 80 | /// @param input Pointer to the 2D array of pixels used as input | ||
| 81 | /// @pre input and output points to an array large enough to hold the number of bytes used | ||
| 82 | void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, | ||
| 83 | u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, | ||
| 84 | u32 origin_y, u8* output, const u8* input); | ||
| 85 | |||
| 86 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 87 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); | ||
| 88 | 67 | ||
| 89 | /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y' | 68 | /// Obtains the offset of the gob for positions 'dst_x' & 'dst_y' |
| 90 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | 69 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, |