summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-04-23 12:41:55 -0400
committerGravatar FernandoS272019-04-23 15:28:18 -0400
commitb3118ee316863e1f4a35548f69bc1194bb740627 (patch)
tree59118dae69b2e490b7b7856af8ae954d9cae9891
parentAdd Swizzle Parameters to the DMA engine (diff)
downloadyuzu-b3118ee316863e1f4a35548f69bc1194bb740627.tar.gz
yuzu-b3118ee316863e1f4a35548f69bc1194bb740627.tar.xz
yuzu-b3118ee316863e1f4a35548f69bc1194bb740627.zip
Fixes and Corrections to DMA Engine
-rw-r--r--src/video_core/engines/maxwell_dma.cpp83
-rw-r--r--src/video_core/engines/maxwell_dma.h11
2 files changed, 57 insertions, 37 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
83 83
84 ASSERT(regs.exec.enable_2d == 1); 84 ASSERT(regs.exec.enable_2d == 1);
85 85
86 const std::size_t copy_size = regs.x_count * regs.y_count; 86 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
87 ASSERT(regs.src_params.size_z == 1);
88 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
89 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
90 const std::size_t src_size = Texture::CalculateSize(
91 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
92 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
87 93
88 auto source_ptr{memory_manager.GetPointer(source)}; 94 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90 95
91 if (!source_ptr) { 96 if (read_buffer.size() < src_size) {
92 LOG_ERROR(HW_GPU, "source_ptr is invalid"); 97 read_buffer.resize(src_size);
93 return; 98 }
94 }
95 99
96 if (!dst_ptr) { 100 if (write_buffer.size() < dst_size) {
97 LOG_ERROR(HW_GPU, "dst_ptr is invalid"); 101 write_buffer.resize(dst_size);
98 return; 102 }
99 }
100 103
101 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 104 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
102 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 105 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
103 // copying.
104 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
105 106
106 // We have to invalidate the destination region to evict any outdated surfaces from the 107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
107 // cache. We do this before actually writing the new data because the destination address 108 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
108 // might contain a dirty surface that will have to be written back to memory. 109 write_buffer.data(), regs.src_params.BlockHeight(),
109 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); 110 regs.src_params.pos_x, regs.src_params.pos_y);
110 };
111 111
112 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 112 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
113 ASSERT(regs.src_params.size_z == 1); 113 } else {
114 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 114 ASSERT(regs.dst_params.BlockDepth() == 1);
115 115
116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
117 117
118 FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, 118 const std::size_t dst_size = Texture::CalculateSize(
119 copy_size * src_bytes_per_pixel); 119 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
120 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
120 121
121 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 122 const std::size_t dst_layer_size = Texture::CalculateSize(
122 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, 123 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
123 regs.src_params.BlockHeight(), regs.src_params.pos_x, 124 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
124 regs.src_params.pos_y);
125 } else {
126 ASSERT(regs.dst_params.size_z == 1);
127 ASSERT(regs.src_pitch == regs.x_count);
128 125
129 const u32 src_bpp = regs.src_pitch / regs.x_count; 126 const std::size_t src_size = regs.src_pitch * regs.y_count;
130 127
131 FlushAndInvalidate(regs.src_pitch * regs.y_count, 128 if (read_buffer.size() < src_size) {
132 regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); 129 read_buffer.resize(src_size);
130 }
131
132 if (write_buffer.size() < dst_size) {
133 write_buffer.resize(dst_size);
134 }
135
136 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
137 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
133 138
134 // If the input is linear and the output is tiled, swizzle the input and copy it over. 139 // If the input is linear and the output is tiled, swizzle the input and copy it over.
135 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 140 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
136 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); 141 src_bytes_per_pixel,
142 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
143 read_buffer.data(), regs.dst_params.BlockHeight());
144
145 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
137 } 146 }
138} 147}
139 148
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 0b2e26199..8eab1332e 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -155,6 +156,13 @@ public:
155 BitField<16, 2, u32> component_size; 156 BitField<16, 2, u32> component_size;
156 BitField<20, 3, u32> src_num_components; 157 BitField<20, 3, u32> src_num_components;
157 BitField<24, 3, u32> dst_num_components; 158 BitField<24, 3, u32> dst_num_components;
159
160 u32 SrcBytePerPixel() const {
161 return src_num_components.Value() * component_size.Value();
162 }
163 u32 DstBytePerPixel() const {
164 return dst_num_components.Value() * component_size.Value();
165 }
158 } swizzle_config; 166 } swizzle_config;
159 167
160 Parameters dst_params; 168 Parameters dst_params;
@@ -176,6 +184,9 @@ private:
176 184
177 MemoryManager& memory_manager; 185 MemoryManager& memory_manager;
178 186
187 std::vector<u8> read_buffer;
188 std::vector<u8> write_buffer;
189
179 /// Performs the copy from the source buffer to the destination buffer as configured in the 190 /// Performs the copy from the source buffer to the destination buffer as configured in the
180 /// registers. 191 /// registers.
181 void HandleCopy(); 192 void HandleCopy();