diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 294 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 348 |
2 files changed, 355 insertions, 287 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01d7df405..28faad9ff 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -14,50 +14,44 @@ | |||
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | using namespace Texture; | ||
| 18 | |||
| 17 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) | 19 | MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) |
| 18 | : system{system}, memory_manager{memory_manager} {} | 20 | : system{system}, memory_manager{memory_manager} {} |
| 19 | 21 | ||
| 20 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 22 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 21 | ASSERT_MSG(method < Regs::NUM_REGS, | 23 | ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); |
| 22 | "Invalid MaxwellDMA register, increase the size of the Regs structure"); | ||
| 23 | 24 | ||
| 24 | regs.reg_array[method] = method_argument; | 25 | regs.reg_array[method] = method_argument; |
| 25 | 26 | ||
| 26 | #define MAXWELLDMA_REG_INDEX(field_name) \ | 27 | if (method == offsetof(Regs, launch_dma) / sizeof(u32)) { |
| 27 | (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) | 28 | Launch(); |
| 28 | |||
| 29 | switch (method) { | ||
| 30 | case MAXWELLDMA_REG_INDEX(exec): { | ||
| 31 | HandleCopy(); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 29 | } |
| 35 | |||
| 36 | #undef MAXWELLDMA_REG_INDEX | ||
| 37 | } | 30 | } |
| 38 | 31 | ||
| 39 | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 32 | void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 40 | u32 methods_pending) { | 33 | u32 methods_pending) { |
| 41 | for (std::size_t i = 0; i < amount; i++) { | 34 | for (size_t i = 0; i < amount; ++i) { |
| 42 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 35 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
| 43 | } | 36 | } |
| 44 | } | 37 | } |
| 45 | 38 | ||
| 46 | void MaxwellDMA::HandleCopy() { | 39 | void MaxwellDMA::Launch() { |
| 47 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); | 40 | LOG_TRACE(HW_GPU, "Requested a DMA copy"); |
| 48 | 41 | ||
| 49 | const GPUVAddr source = regs.src_address.Address(); | ||
| 50 | const GPUVAddr dest = regs.dst_address.Address(); | ||
| 51 | |||
| 52 | // TODO(Subv): Perform more research and implement all features of this engine. | 42 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 53 | ASSERT(regs.exec.enable_swizzle == 0); | 43 | const LaunchDMA& launch = regs.launch_dma; |
| 54 | ASSERT(regs.exec.query_mode == Regs::QueryMode::None); | 44 | ASSERT(launch.remap_enable == 0); |
| 55 | ASSERT(regs.exec.query_intr == Regs::QueryIntr::None); | 45 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); |
| 56 | ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2); | 46 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 57 | ASSERT(regs.dst_params.pos_x == 0); | 47 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| 58 | ASSERT(regs.dst_params.pos_y == 0); | 48 | ASSERT(regs.dst_params.origin.x == 0); |
| 59 | 49 | ASSERT(regs.dst_params.origin.y == 0); | |
| 60 | if (!regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 50 | |
| 51 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | ||
| 52 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | ||
| 53 | |||
| 54 | if (!is_src_pitch && !is_dst_pitch) { | ||
| 61 | // If both the source and the destination are in block layout, assert. | 55 | // If both the source and the destination are in block layout, assert. |
| 62 | UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented"); | 56 | UNREACHABLE_MSG("Tiled->Tiled DMA transfers are not yet implemented"); |
| 63 | return; | 57 | return; |
| @@ -66,144 +60,158 @@ void MaxwellDMA::HandleCopy() { | |||
| 66 | // All copies here update the main memory, so mark all rasterizer states as invalid. | 60 | // All copies here update the main memory, so mark all rasterizer states as invalid. |
| 67 | system.GPU().Maxwell3D().OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().OnMemoryWrite(); |
| 68 | 62 | ||
| 69 | if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { | 63 | if (is_src_pitch && is_dst_pitch) { |
| 70 | // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | 64 | CopyPitchToPitch(); |
| 71 | // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, | 65 | } else { |
| 72 | // y_count). | 66 | ASSERT(launch.multi_line_enable == 1); |
| 73 | if (!regs.exec.enable_2d) { | ||
| 74 | memory_manager.CopyBlock(dest, source, regs.x_count); | ||
| 75 | return; | ||
| 76 | } | ||
| 77 | 67 | ||
| 78 | // If both the source and the destination are in linear layout, perform a line-by-line | 68 | if (!is_src_pitch && is_dst_pitch) { |
| 79 | // copy. We're going to take a subrect of size (x_count, y_count) from the source | 69 | CopyBlockLinearToPitch(); |
| 80 | // rectangle. There is no need to manually flush/invalidate the regions because | 70 | } else { |
| 81 | // CopyBlock does that for us. | 71 | CopyPitchToBlockLinear(); |
| 82 | for (u32 line = 0; line < regs.y_count; ++line) { | ||
| 83 | const GPUVAddr source_line = source + line * regs.src_pitch; | ||
| 84 | const GPUVAddr dest_line = dest + line * regs.dst_pitch; | ||
| 85 | memory_manager.CopyBlock(dest_line, source_line, regs.x_count); | ||
| 86 | } | 72 | } |
| 87 | return; | ||
| 88 | } | 73 | } |
| 74 | } | ||
| 89 | 75 | ||
| 90 | ASSERT(regs.exec.enable_2d == 1); | 76 | void MaxwellDMA::CopyPitchToPitch() { |
| 91 | 77 | // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D | |
| 92 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 78 | // buffer of length `line_length_in`. |
| 93 | 79 | // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). | |
| 94 | ASSERT(regs.src_params.BlockDepth() == 0); | 80 | if (!regs.launch_dma.multi_line_enable) { |
| 95 | // Optimized path for micro copies. | 81 | memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); |
| 96 | if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { | 82 | return; |
| 97 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | 83 | } |
| 98 | const std::size_t src_size = Texture::GetGOBSize(); | ||
| 99 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||
| 100 | u32 pos_x = regs.src_params.pos_x; | ||
| 101 | u32 pos_y = regs.src_params.pos_y; | ||
| 102 | const u64 offset = | ||
| 103 | Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, | ||
| 104 | regs.src_params.BlockDepth(), bytes_per_pixel); | ||
| 105 | const u32 x_in_gob = 64 / bytes_per_pixel; | ||
| 106 | pos_x = pos_x % x_in_gob; | ||
| 107 | pos_y = pos_y % 8; | ||
| 108 | |||
| 109 | if (read_buffer.size() < src_size) { | ||
| 110 | read_buffer.resize(src_size); | ||
| 111 | } | ||
| 112 | |||
| 113 | if (write_buffer.size() < dst_size) { | ||
| 114 | write_buffer.resize(dst_size); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (Settings::IsGPULevelExtreme()) { | ||
| 118 | memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); | ||
| 119 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 120 | } else { | ||
| 121 | memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); | ||
| 122 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 123 | } | ||
| 124 | |||
| 125 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||
| 126 | regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), | ||
| 127 | write_buffer.data(), regs.src_params.BlockHeight(), pos_x, | ||
| 128 | pos_y); | ||
| 129 | |||
| 130 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||
| 131 | |||
| 132 | return; | ||
| 133 | } | ||
| 134 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||
| 135 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||
| 136 | const std::size_t src_size = Texture::CalculateSize( | ||
| 137 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | ||
| 138 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 139 | |||
| 140 | const std::size_t src_layer_size = Texture::CalculateSize( | ||
| 141 | true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1, | ||
| 142 | regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 143 | |||
| 144 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||
| 145 | 84 | ||
| 146 | if (read_buffer.size() < src_size) { | 85 | // Perform a line-by-line copy. |
| 147 | read_buffer.resize(src_size); | 86 | // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. |
| 148 | } | 87 | // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. |
| 88 | for (u32 line = 0; line < regs.line_count; ++line) { | ||
| 89 | const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in; | ||
| 90 | const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out; | ||
| 91 | memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); | ||
| 92 | } | ||
| 93 | } | ||
| 149 | 94 | ||
| 150 | if (write_buffer.size() < dst_size) { | 95 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 151 | write_buffer.resize(dst_size); | 96 | ASSERT(regs.src_params.block_size.depth == 0); |
| 152 | } | ||
| 153 | 97 | ||
| 154 | if (Settings::IsGPULevelExtreme()) { | 98 | // Optimized path for micro copies. |
| 155 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 99 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 156 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 100 | if (dst_size < GetGOBSize() && regs.pitch_out <= 64) { |
| 157 | } else { | 101 | FastCopyBlockLinearToPitch(); |
| 158 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | 102 | return; |
| 159 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | 103 | } |
| 160 | } | ||
| 161 | 104 | ||
| 162 | Texture::UnswizzleSubrect( | 105 | // Deswizzle the input and copy it over. |
| 163 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, | 106 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |
| 164 | read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(), | 107 | const Parameters& src_params = regs.src_params; |
| 165 | regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y); | 108 | const u32 width = src_params.width; |
| 109 | const u32 height = src_params.height; | ||
| 110 | const u32 depth = src_params.depth; | ||
| 111 | const u32 block_height = src_params.block_size.height; | ||
| 112 | const u32 block_depth = src_params.block_size.depth; | ||
| 113 | const size_t src_size = | ||
| 114 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | ||
| 115 | const size_t src_layer_size = | ||
| 116 | CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); | ||
| 117 | |||
| 118 | if (read_buffer.size() < src_size) { | ||
| 119 | read_buffer.resize(src_size); | ||
| 120 | } | ||
| 121 | if (write_buffer.size() < dst_size) { | ||
| 122 | write_buffer.resize(dst_size); | ||
| 123 | } | ||
| 166 | 124 | ||
| 167 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 125 | if (Settings::IsGPULevelExtreme()) { |
| 126 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 127 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 168 | } else { | 128 | } else { |
| 169 | ASSERT(regs.dst_params.BlockDepth() == 0); | 129 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); |
| 130 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 131 | } | ||
| 170 | 132 | ||
| 171 | const u32 bytes_per_pixel = regs.src_pitch / regs.x_count; | 133 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, src_params.width, |
| 134 | bytes_per_pixel, read_buffer.data() + src_layer_size * src_params.layer, | ||
| 135 | write_buffer.data(), src_params.block_size.height, src_params.origin.x, | ||
| 136 | src_params.origin.y); | ||
| 172 | 137 | ||
| 173 | const std::size_t dst_size = Texture::CalculateSize( | 138 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 174 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, | 139 | } |
| 175 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | ||
| 176 | 140 | ||
| 177 | const std::size_t dst_layer_size = Texture::CalculateSize( | 141 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 178 | true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, | 142 | const auto& dst_params = regs.dst_params; |
| 179 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | 143 | ASSERT(dst_params.block_size.depth == 0); |
| 144 | |||
| 145 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | ||
| 146 | const u32 width = dst_params.width; | ||
| 147 | const u32 height = dst_params.height; | ||
| 148 | const u32 depth = dst_params.depth; | ||
| 149 | const u32 block_height = dst_params.block_size.height; | ||
| 150 | const u32 block_depth = dst_params.block_size.depth; | ||
| 151 | const size_t dst_size = | ||
| 152 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | ||
| 153 | const size_t dst_layer_size = | ||
| 154 | CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); | ||
| 155 | |||
| 156 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | ||
| 157 | |||
| 158 | if (read_buffer.size() < src_size) { | ||
| 159 | read_buffer.resize(src_size); | ||
| 160 | } | ||
| 161 | if (write_buffer.size() < dst_size) { | ||
| 162 | write_buffer.resize(dst_size); | ||
| 163 | } | ||
| 180 | 164 | ||
| 181 | const std::size_t src_size = regs.src_pitch * regs.y_count; | 165 | if (Settings::IsGPULevelExtreme()) { |
| 166 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 167 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 168 | } else { | ||
| 169 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); | ||
| 170 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 171 | } | ||
| 182 | 172 | ||
| 183 | if (read_buffer.size() < src_size) { | 173 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 184 | read_buffer.resize(src_size); | 174 | SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, dst_params.width, |
| 185 | } | 175 | bytes_per_pixel, write_buffer.data() + dst_layer_size * dst_params.layer, |
| 176 | read_buffer.data(), dst_params.block_size.height, dst_params.origin.x, | ||
| 177 | dst_params.origin.y); | ||
| 186 | 178 | ||
| 187 | if (write_buffer.size() < dst_size) { | 179 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 188 | write_buffer.resize(dst_size); | 180 | } |
| 189 | } | ||
| 190 | 181 | ||
| 191 | if (Settings::IsGPULevelExtreme()) { | 182 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| 192 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 183 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |
| 193 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 184 | const size_t src_size = GetGOBSize(); |
| 194 | } else { | 185 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 195 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | 186 | u32 pos_x = regs.src_params.origin.x; |
| 196 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | 187 | u32 pos_y = regs.src_params.origin.y; |
| 197 | } | 188 | const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, |
| 189 | regs.src_params.block_size.height, bytes_per_pixel); | ||
| 190 | const u32 x_in_gob = 64 / bytes_per_pixel; | ||
| 191 | pos_x = pos_x % x_in_gob; | ||
| 192 | pos_y = pos_y % 8; | ||
| 193 | |||
| 194 | if (read_buffer.size() < src_size) { | ||
| 195 | read_buffer.resize(src_size); | ||
| 196 | } | ||
| 198 | 197 | ||
| 199 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 198 | if (write_buffer.size() < dst_size) { |
| 200 | Texture::SwizzleSubrect( | 199 | write_buffer.resize(dst_size); |
| 201 | regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel, | 200 | } |
| 202 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(), | ||
| 203 | regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y); | ||
| 204 | 201 | ||
| 205 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 202 | if (Settings::IsGPULevelExtreme()) { |
| 203 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); | ||
| 204 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 205 | } else { | ||
| 206 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); | ||
| 207 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 206 | } | 208 | } |
| 209 | |||
| 210 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, | ||
| 211 | bytes_per_pixel, read_buffer.data(), write_buffer.data(), | ||
| 212 | regs.src_params.block_size.height, pos_x, pos_y); | ||
| 213 | |||
| 214 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 207 | } | 215 | } |
| 208 | 216 | ||
| 209 | } // namespace Tegra::Engines | 217 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 502dd8509..50f445efc 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -24,160 +24,190 @@ class MemoryManager; | |||
| 24 | namespace Tegra::Engines { | 24 | namespace Tegra::Engines { |
| 25 | 25 | ||
| 26 | /** | 26 | /** |
| 27 | * This Engine is known as GK104_Copy. Documentation can be found in: | 27 | * This engine is known as gk104_copy. Documentation can be found in: |
| 28 | * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h | ||
| 28 | * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml | 29 | * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml |
| 29 | */ | 30 | */ |
| 30 | 31 | ||
| 31 | class MaxwellDMA final : public EngineInterface { | 32 | class MaxwellDMA final : public EngineInterface { |
| 32 | public: | 33 | public: |
| 33 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); | 34 | struct PackedGPUVAddr { |
| 34 | ~MaxwellDMA() = default; | 35 | u32 upper; |
| 35 | 36 | u32 lower; | |
| 36 | /// Write the value to the register identified by method. | 37 | |
| 37 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | 38 | constexpr operator GPUVAddr() const noexcept { |
| 38 | 39 | return (static_cast<GPUVAddr>(upper & 0xff) << 32) | lower; | |
| 39 | /// Write multiple values to the register identified by method. | 40 | } |
| 40 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 41 | }; |
| 41 | u32 methods_pending) override; | 42 | |
| 43 | union BlockSize { | ||
| 44 | BitField<0, 4, u32> width; | ||
| 45 | BitField<4, 4, u32> height; | ||
| 46 | BitField<8, 4, u32> depth; | ||
| 47 | BitField<12, 4, u32> gob_height; | ||
| 48 | }; | ||
| 49 | static_assert(sizeof(BlockSize) == 4); | ||
| 50 | |||
| 51 | union Origin { | ||
| 52 | BitField<0, 16, u32> x; | ||
| 53 | BitField<16, 16, u32> y; | ||
| 54 | }; | ||
| 55 | static_assert(sizeof(Origin) == 4); | ||
| 56 | |||
| 57 | struct Parameters { | ||
| 58 | BlockSize block_size; | ||
| 59 | u32 width; | ||
| 60 | u32 height; | ||
| 61 | u32 depth; | ||
| 62 | u32 layer; | ||
| 63 | Origin origin; | ||
| 64 | }; | ||
| 65 | static_assert(sizeof(Parameters) == 24); | ||
| 66 | |||
| 67 | struct Semaphore { | ||
| 68 | PackedGPUVAddr address; | ||
| 69 | u32 payload; | ||
| 70 | }; | ||
| 71 | static_assert(sizeof(Semaphore) == 12); | ||
| 72 | |||
| 73 | struct RenderEnable { | ||
| 74 | enum class Mode : u32 { | ||
| 75 | FALSE = 0, | ||
| 76 | TRUE = 1, | ||
| 77 | CONDITIONAL = 2, | ||
| 78 | RENDER_IF_EQUAL = 3, | ||
| 79 | RENDER_IF_NOT_EQUAL = 4, | ||
| 80 | }; | ||
| 42 | 81 | ||
| 43 | struct Regs { | 82 | PackedGPUVAddr address; |
| 44 | static constexpr std::size_t NUM_REGS = 0x1D6; | 83 | BitField<0, 3, Mode> mode; |
| 84 | }; | ||
| 85 | static_assert(sizeof(RenderEnable) == 12); | ||
| 86 | |||
| 87 | enum class PhysModeTarget : u32 { | ||
| 88 | LOCAL_FB = 0, | ||
| 89 | COHERENT_SYSMEM = 1, | ||
| 90 | NONCOHERENT_SYSMEM = 2, | ||
| 91 | }; | ||
| 92 | using PhysMode = BitField<0, 2, PhysModeTarget>; | ||
| 93 | |||
| 94 | union LaunchDMA { | ||
| 95 | enum class DataTransferType : u32 { | ||
| 96 | NONE = 0, | ||
| 97 | PIPELINED = 1, | ||
| 98 | NON_PIPELINED = 2, | ||
| 99 | }; | ||
| 45 | 100 | ||
| 46 | struct Parameters { | 101 | enum class SemaphoreType : u32 { |
| 47 | union { | 102 | NONE = 0, |
| 48 | BitField<0, 4, u32> block_depth; | 103 | RELEASE_ONE_WORD_SEMAPHORE = 1, |
| 49 | BitField<4, 4, u32> block_height; | 104 | RELEASE_FOUR_WORD_SEMAPHORE = 2, |
| 50 | BitField<8, 4, u32> block_width; | 105 | }; |
| 51 | }; | ||
| 52 | u32 size_x; | ||
| 53 | u32 size_y; | ||
| 54 | u32 size_z; | ||
| 55 | u32 pos_z; | ||
| 56 | union { | ||
| 57 | BitField<0, 16, u32> pos_x; | ||
| 58 | BitField<16, 16, u32> pos_y; | ||
| 59 | }; | ||
| 60 | 106 | ||
| 61 | u32 BlockHeight() const { | 107 | enum class InterruptType : u32 { |
| 62 | return block_height.Value(); | 108 | NONE = 0, |
| 63 | } | 109 | BLOCKING = 1, |
| 110 | NON_BLOCKING = 2, | ||
| 111 | }; | ||
| 64 | 112 | ||
| 65 | u32 BlockDepth() const { | 113 | enum class MemoryLayout : u32 { |
| 66 | return block_depth.Value(); | 114 | BLOCKLINEAR = 0, |
| 67 | } | 115 | PITCH = 1, |
| 68 | }; | 116 | }; |
| 69 | 117 | ||
| 70 | static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); | 118 | enum class Type : u32 { |
| 119 | VIRTUAL = 0, | ||
| 120 | PHYSICAL = 1, | ||
| 121 | }; | ||
| 71 | 122 | ||
| 72 | enum class ComponentMode : u32 { | 123 | enum class SemaphoreReduction : u32 { |
| 73 | Src0 = 0, | 124 | IMIN = 0, |
| 74 | Src1 = 1, | 125 | IMAX = 1, |
| 75 | Src2 = 2, | 126 | IXOR = 2, |
| 76 | Src3 = 3, | 127 | IAND = 3, |
| 77 | Const0 = 4, | 128 | IOR = 4, |
| 78 | Const1 = 5, | 129 | IADD = 5, |
| 79 | Zero = 6, | 130 | INC = 6, |
| 131 | DEC = 7, | ||
| 132 | FADD = 0xA, | ||
| 80 | }; | 133 | }; |
| 81 | 134 | ||
| 82 | enum class CopyMode : u32 { | 135 | enum class SemaphoreReductionSign : u32 { |
| 83 | None = 0, | 136 | SIGNED = 0, |
| 84 | Unk1 = 1, | 137 | UNSIGNED = 1, |
| 85 | Unk2 = 2, | ||
| 86 | }; | 138 | }; |
| 87 | 139 | ||
| 88 | enum class QueryMode : u32 { | 140 | enum class BypassL2 : u32 { |
| 89 | None = 0, | 141 | USE_PTE_SETTING = 0, |
| 90 | Short = 1, | 142 | FORCE_VOLATILE = 1, |
| 91 | Long = 2, | ||
| 92 | }; | 143 | }; |
| 93 | 144 | ||
| 94 | enum class QueryIntr : u32 { | 145 | BitField<0, 2, DataTransferType> data_transfer_type; |
| 95 | None = 0, | 146 | BitField<2, 1, u32> flush_enable; |
| 96 | Block = 1, | 147 | BitField<3, 2, SemaphoreType> semaphore_type; |
| 97 | NonBlock = 2, | 148 | BitField<5, 2, InterruptType> interrupt_type; |
| 149 | BitField<7, 1, MemoryLayout> src_memory_layout; | ||
| 150 | BitField<8, 1, MemoryLayout> dst_memory_layout; | ||
| 151 | BitField<9, 1, u32> multi_line_enable; | ||
| 152 | BitField<10, 1, u32> remap_enable; | ||
| 153 | BitField<11, 1, u32> rmwdisable; | ||
| 154 | BitField<12, 1, Type> src_type; | ||
| 155 | BitField<13, 1, Type> dst_type; | ||
| 156 | BitField<14, 4, SemaphoreReduction> semaphore_reduction; | ||
| 157 | BitField<18, 1, SemaphoreReductionSign> semaphore_reduction_sign; | ||
| 158 | BitField<19, 1, u32> reduction_enable; | ||
| 159 | BitField<20, 1, BypassL2> bypass_l2; | ||
| 160 | }; | ||
| 161 | static_assert(sizeof(LaunchDMA) == 4); | ||
| 162 | |||
| 163 | struct RemapConst { | ||
| 164 | enum Swizzle : u32 { | ||
| 165 | SRC_X = 0, | ||
| 166 | SRC_Y = 1, | ||
| 167 | SRC_Z = 2, | ||
| 168 | SRC_W = 3, | ||
| 169 | CONST_A = 4, | ||
| 170 | CONST_B = 5, | ||
| 171 | NO_WRITE = 6, | ||
| 98 | }; | 172 | }; |
| 99 | 173 | ||
| 100 | union { | 174 | PackedGPUVAddr address; |
| 101 | struct { | ||
| 102 | INSERT_UNION_PADDING_WORDS(0xC0); | ||
| 103 | |||
| 104 | struct { | ||
| 105 | union { | ||
| 106 | BitField<0, 2, CopyMode> copy_mode; | ||
| 107 | BitField<2, 1, u32> flush; | ||
| 108 | |||
| 109 | BitField<3, 2, QueryMode> query_mode; | ||
| 110 | BitField<5, 2, QueryIntr> query_intr; | ||
| 111 | |||
| 112 | BitField<7, 1, u32> is_src_linear; | ||
| 113 | BitField<8, 1, u32> is_dst_linear; | ||
| 114 | |||
| 115 | BitField<9, 1, u32> enable_2d; | ||
| 116 | BitField<10, 1, u32> enable_swizzle; | ||
| 117 | }; | ||
| 118 | } exec; | ||
| 119 | |||
| 120 | INSERT_UNION_PADDING_WORDS(0x3F); | ||
| 121 | |||
| 122 | struct { | ||
| 123 | u32 address_high; | ||
| 124 | u32 address_low; | ||
| 125 | |||
| 126 | GPUVAddr Address() const { | ||
| 127 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 128 | address_low); | ||
| 129 | } | ||
| 130 | } src_address; | ||
| 131 | |||
| 132 | struct { | ||
| 133 | u32 address_high; | ||
| 134 | u32 address_low; | ||
| 135 | |||
| 136 | GPUVAddr Address() const { | ||
| 137 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 138 | address_low); | ||
| 139 | } | ||
| 140 | } dst_address; | ||
| 141 | |||
| 142 | u32 src_pitch; | ||
| 143 | u32 dst_pitch; | ||
| 144 | u32 x_count; | ||
| 145 | u32 y_count; | ||
| 146 | |||
| 147 | INSERT_UNION_PADDING_WORDS(0xB8); | ||
| 148 | |||
| 149 | u32 const0; | ||
| 150 | u32 const1; | ||
| 151 | union { | ||
| 152 | BitField<0, 4, ComponentMode> component0; | ||
| 153 | BitField<4, 4, ComponentMode> component1; | ||
| 154 | BitField<8, 4, ComponentMode> component2; | ||
| 155 | BitField<12, 4, ComponentMode> component3; | ||
| 156 | BitField<16, 2, u32> component_size; | ||
| 157 | BitField<20, 3, u32> src_num_components; | ||
| 158 | BitField<24, 3, u32> dst_num_components; | ||
| 159 | |||
| 160 | u32 SrcBytePerPixel() const { | ||
| 161 | return src_num_components.Value() * component_size.Value(); | ||
| 162 | } | ||
| 163 | u32 DstBytePerPixel() const { | ||
| 164 | return dst_num_components.Value() * component_size.Value(); | ||
| 165 | } | ||
| 166 | } swizzle_config; | ||
| 167 | 175 | ||
| 168 | Parameters dst_params; | 176 | union { |
| 177 | BitField<0, 3, Swizzle> dst_x; | ||
| 178 | BitField<4, 3, Swizzle> dst_y; | ||
| 179 | BitField<8, 3, Swizzle> dst_z; | ||
| 180 | BitField<12, 3, Swizzle> dst_w; | ||
| 181 | BitField<16, 2, u32> component_size_minus_one; | ||
| 182 | BitField<20, 2, u32> num_src_components_minus_one; | ||
| 183 | BitField<24, 2, u32> num_dst_components_minus_one; | ||
| 184 | }; | ||
| 185 | }; | ||
| 186 | static_assert(sizeof(RemapConst) == 12); | ||
| 169 | 187 | ||
| 170 | INSERT_UNION_PADDING_WORDS(1); | 188 | explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); |
| 189 | ~MaxwellDMA() = default; | ||
| 171 | 190 | ||
| 172 | Parameters src_params; | 191 | /// Write the value to the register identified by method. |
| 192 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | ||
| 173 | 193 | ||
| 174 | INSERT_UNION_PADDING_WORDS(0x13); | 194 | /// Write multiple values to the register identified by method. |
| 175 | }; | 195 | void CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 176 | std::array<u32, NUM_REGS> reg_array; | 196 | u32 methods_pending) override; |
| 177 | }; | ||
| 178 | } regs{}; | ||
| 179 | 197 | ||
| 180 | private: | 198 | private: |
| 199 | /// Performs the copy from the source buffer to the destination buffer as configured in the | ||
| 200 | /// registers. | ||
| 201 | void Launch(); | ||
| 202 | |||
| 203 | void CopyPitchToPitch(); | ||
| 204 | |||
| 205 | void CopyBlockLinearToPitch(); | ||
| 206 | |||
| 207 | void CopyPitchToBlockLinear(); | ||
| 208 | |||
| 209 | void FastCopyBlockLinearToPitch(); | ||
| 210 | |||
| 181 | Core::System& system; | 211 | Core::System& system; |
| 182 | 212 | ||
| 183 | MemoryManager& memory_manager; | 213 | MemoryManager& memory_manager; |
| @@ -185,28 +215,58 @@ private: | |||
| 185 | std::vector<u8> read_buffer; | 215 | std::vector<u8> read_buffer; |
| 186 | std::vector<u8> write_buffer; | 216 | std::vector<u8> write_buffer; |
| 187 | 217 | ||
| 188 | /// Performs the copy from the source buffer to the destination buffer as configured in the | 218 | static constexpr std::size_t NUM_REGS = 0x800; |
| 189 | /// registers. | 219 | struct Regs { |
| 190 | void HandleCopy(); | 220 | union { |
| 191 | }; | 221 | struct { |
| 222 | u32 reserved[0x40]; | ||
| 223 | u32 nop; | ||
| 224 | u32 reserved01[0xf]; | ||
| 225 | u32 pm_trigger; | ||
| 226 | u32 reserved02[0x3f]; | ||
| 227 | Semaphore semaphore; | ||
| 228 | u32 reserved03[0x2]; | ||
| 229 | RenderEnable render_enable; | ||
| 230 | PhysMode src_phys_mode; | ||
| 231 | PhysMode dst_phys_mode; | ||
| 232 | u32 reserved04[0x26]; | ||
| 233 | LaunchDMA launch_dma; | ||
| 234 | u32 reserved05[0x3f]; | ||
| 235 | PackedGPUVAddr offset_in; | ||
| 236 | PackedGPUVAddr offset_out; | ||
| 237 | u32 pitch_in; | ||
| 238 | u32 pitch_out; | ||
| 239 | u32 line_length_in; | ||
| 240 | u32 line_count; | ||
| 241 | u32 reserved06[0xb8]; | ||
| 242 | RemapConst remap_const; | ||
| 243 | Parameters dst_params; | ||
| 244 | u32 reserved07[0x1]; | ||
| 245 | Parameters src_params; | ||
| 246 | u32 reserved08[0x275]; | ||
| 247 | u32 pm_trigger_end; | ||
| 248 | u32 reserved09[0x3ba]; | ||
| 249 | }; | ||
| 250 | std::array<u32, NUM_REGS> reg_array; | ||
| 251 | }; | ||
| 252 | } regs{}; | ||
| 192 | 253 | ||
| 193 | #define ASSERT_REG_POSITION(field_name, position) \ | 254 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 194 | static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ | 255 | static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \ |
| 195 | "Field " #field_name " has invalid position") | 256 | "Field " #field_name " has invalid position") |
| 196 | 257 | ||
| 197 | ASSERT_REG_POSITION(exec, 0xC0); | 258 | ASSERT_REG_POSITION(launch_dma, 0xC0); |
| 198 | ASSERT_REG_POSITION(src_address, 0x100); | 259 | ASSERT_REG_POSITION(offset_in, 0x100); |
| 199 | ASSERT_REG_POSITION(dst_address, 0x102); | 260 | ASSERT_REG_POSITION(offset_out, 0x102); |
| 200 | ASSERT_REG_POSITION(src_pitch, 0x104); | 261 | ASSERT_REG_POSITION(pitch_in, 0x104); |
| 201 | ASSERT_REG_POSITION(dst_pitch, 0x105); | 262 | ASSERT_REG_POSITION(pitch_out, 0x105); |
| 202 | ASSERT_REG_POSITION(x_count, 0x106); | 263 | ASSERT_REG_POSITION(line_length_in, 0x106); |
| 203 | ASSERT_REG_POSITION(y_count, 0x107); | 264 | ASSERT_REG_POSITION(line_count, 0x107); |
| 204 | ASSERT_REG_POSITION(const0, 0x1C0); | 265 | ASSERT_REG_POSITION(remap_const, 0x1C0); |
| 205 | ASSERT_REG_POSITION(const1, 0x1C1); | 266 | ASSERT_REG_POSITION(dst_params, 0x1C3); |
| 206 | ASSERT_REG_POSITION(swizzle_config, 0x1C2); | 267 | ASSERT_REG_POSITION(src_params, 0x1CA); |
| 207 | ASSERT_REG_POSITION(dst_params, 0x1C3); | ||
| 208 | ASSERT_REG_POSITION(src_params, 0x1CA); | ||
| 209 | 268 | ||
| 210 | #undef ASSERT_REG_POSITION | 269 | #undef ASSERT_REG_POSITION |
| 270 | }; | ||
| 211 | 271 | ||
| 212 | } // namespace Tegra::Engines | 272 | } // namespace Tegra::Engines |