diff options
| author | 2019-04-23 12:41:55 -0400 | |
|---|---|---|
| committer | 2019-04-23 15:28:18 -0400 | |
| commit | b3118ee316863e1f4a35548f69bc1194bb740627 (patch) | |
| tree | 59118dae69b2e490b7b7856af8ae954d9cae9891 | |
| parent | Add Swizzle Parameters to the DMA engine (diff) | |
| download | yuzu-b3118ee316863e1f4a35548f69bc1194bb740627.tar.gz yuzu-b3118ee316863e1f4a35548f69bc1194bb740627.tar.xz yuzu-b3118ee316863e1f4a35548f69bc1194bb740627.zip | |
Fixes and Corrections to DMA Engine
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 83 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 11 |
2 files changed, 57 insertions, 37 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d0067..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() { | |||
| 83 | 83 | ||
| 84 | ASSERT(regs.exec.enable_2d == 1); | 84 | ASSERT(regs.exec.enable_2d == 1); |
| 85 | 85 | ||
| 86 | const std::size_t copy_size = regs.x_count * regs.y_count; | 86 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 87 | ASSERT(regs.src_params.size_z == 1); | ||
| 88 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||
| 89 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | ||
| 90 | const std::size_t src_size = Texture::CalculateSize( | ||
| 91 | true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | ||
| 92 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 87 | 93 | ||
| 88 | auto source_ptr{memory_manager.GetPointer(source)}; | 94 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; |
| 89 | auto dst_ptr{memory_manager.GetPointer(dest)}; | ||
| 90 | 95 | ||
| 91 | if (!source_ptr) { | 96 | if (read_buffer.size() < src_size) { |
| 92 | LOG_ERROR(HW_GPU, "source_ptr is invalid"); | 97 | read_buffer.resize(src_size); |
| 93 | return; | 98 | } |
| 94 | } | ||
| 95 | 99 | ||
| 96 | if (!dst_ptr) { | 100 | if (write_buffer.size() < dst_size) { |
| 97 | LOG_ERROR(HW_GPU, "dst_ptr is invalid"); | 101 | write_buffer.resize(dst_size); |
| 98 | return; | 102 | } |
| 99 | } | ||
| 100 | 103 | ||
| 101 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 104 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 102 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 105 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 103 | // copying. | ||
| 104 | rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); | ||
| 105 | 106 | ||
| 106 | // We have to invalidate the destination region to evict any outdated surfaces from the | 107 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, |
| 107 | // cache. We do this before actually writing the new data because the destination address | 108 | regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), |
| 108 | // might contain a dirty surface that will have to be written back to memory. | 109 | write_buffer.data(), regs.src_params.BlockHeight(), |
| 109 | rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); | 110 | regs.src_params.pos_x, regs.src_params.pos_y); |
| 110 | }; | ||
| 111 | 111 | ||
| 112 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | ASSERT(regs.src_params.size_z == 1); | 113 | } else { |
| 114 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 114 | ASSERT(regs.dst_params.BlockDepth() == 1); |
| 115 | 115 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 117 | ||
| 118 | FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, | 118 | const std::size_t dst_size = Texture::CalculateSize( |
| 119 | copy_size * src_bytes_per_pixel); | 119 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, |
| 120 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | ||
| 120 | 121 | ||
| 121 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 122 | const std::size_t dst_layer_size = Texture::CalculateSize( |
| 122 | regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, | 123 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, |
| 123 | regs.src_params.BlockHeight(), regs.src_params.pos_x, | 124 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 124 | regs.src_params.pos_y); | ||
| 125 | } else { | ||
| 126 | ASSERT(regs.dst_params.size_z == 1); | ||
| 127 | ASSERT(regs.src_pitch == regs.x_count); | ||
| 128 | 125 | ||
| 129 | const u32 src_bpp = regs.src_pitch / regs.x_count; | 126 | const std::size_t src_size = regs.src_pitch * regs.y_count; |
| 130 | 127 | ||
| 131 | FlushAndInvalidate(regs.src_pitch * regs.y_count, | 128 | if (read_buffer.size() < src_size) { |
| 132 | regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); | 129 | read_buffer.resize(src_size); |
| 130 | } | ||
| 131 | |||
| 132 | if (write_buffer.size() < dst_size) { | ||
| 133 | write_buffer.resize(dst_size); | ||
| 134 | } | ||
| 135 | |||
| 136 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | ||
| 137 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 133 | 138 | ||
| 134 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 139 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 135 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 140 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, |
| 136 | src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); | 141 | src_bytes_per_pixel, |
| 142 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, | ||
| 143 | read_buffer.data(), regs.dst_params.BlockHeight()); | ||
| 144 | |||
| 145 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||
| 137 | } | 146 | } |
| 138 | } | 147 | } |
| 139 | 148 | ||
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 0b2e26199..8eab1332e 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -155,6 +156,13 @@ public: | |||
| 155 | BitField<16, 2, u32> component_size; | 156 | BitField<16, 2, u32> component_size; |
| 156 | BitField<20, 3, u32> src_num_components; | 157 | BitField<20, 3, u32> src_num_components; |
| 157 | BitField<24, 3, u32> dst_num_components; | 158 | BitField<24, 3, u32> dst_num_components; |
| 159 | |||
| 160 | u32 SrcBytePerPixel() const { | ||
| 161 | return src_num_components.Value() * component_size.Value(); | ||
| 162 | } | ||
| 163 | u32 DstBytePerPixel() const { | ||
| 164 | return dst_num_components.Value() * component_size.Value(); | ||
| 165 | } | ||
| 158 | } swizzle_config; | 166 | } swizzle_config; |
| 159 | 167 | ||
| 160 | Parameters dst_params; | 168 | Parameters dst_params; |
| @@ -176,6 +184,9 @@ private: | |||
| 176 | 184 | ||
| 177 | MemoryManager& memory_manager; | 185 | MemoryManager& memory_manager; |
| 178 | 186 | ||
| 187 | std::vector<u8> read_buffer; | ||
| 188 | std::vector<u8> write_buffer; | ||
| 189 | |||
| 179 | /// Performs the copy from the source buffer to the destination buffer as configured in the | 190 | /// Performs the copy from the source buffer to the destination buffer as configured in the |
| 180 | /// registers. | 191 | /// registers. |
| 181 | void HandleCopy(); | 192 | void HandleCopy(); |