summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Subv2018-06-10 17:02:33 -0500
committerGravatar Subv2018-06-12 11:27:36 -0500
commit987a17066514dbab8e02acae20d4ea32c4f502eb (patch)
tree81b613108c9a3147bac98de15336dae217fa1be1 /src
parentMerge pull request #553 from Subv/iset (diff)
downloadyuzu-987a17066514dbab8e02acae20d4ea32c4f502eb.tar.gz
yuzu-987a17066514dbab8e02acae20d4ea32c4f502eb.tar.xz
yuzu-987a17066514dbab8e02acae20d4ea32c4f502eb.zip
GPU: Partially implemented the Maxwell DMA engine.
Only tiled->linear and linear->tiled copies that aren't offsetted are supported for now. Queries are not supported. Swizzled copies are not supported.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.cpp1
-rw-r--r--src/video_core/engines/maxwell_dma.cpp69
-rw-r--r--src/video_core/engines/maxwell_dma.h155
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/gpu.h3
7 files changed, 237 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 281810357..6e193e7e1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(video_core STATIC
9 engines/maxwell_3d.h 9 engines/maxwell_3d.h
10 engines/maxwell_compute.cpp 10 engines/maxwell_compute.cpp
11 engines/maxwell_compute.h 11 engines/maxwell_compute.h
12 engines/maxwell_dma.cpp
13 engines/maxwell_dma.h
12 engines/shader_bytecode.h 14 engines/shader_bytecode.h
13 gpu.cpp 15 gpu.cpp
14 gpu.h 16 gpu.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d72d6f760..cec9cb9f3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -16,6 +16,7 @@
16#include "video_core/engines/fermi_2d.h" 16#include "video_core/engines/fermi_2d.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/maxwell_compute.h" 18#include "video_core/engines/maxwell_compute.h"
19#include "video_core/engines/maxwell_dma.h"
19#include "video_core/gpu.h" 20#include "video_core/gpu.h"
20#include "video_core/renderer_base.h" 21#include "video_core/renderer_base.h"
21#include "video_core/video_core.h" 22#include "video_core/video_core.h"
@@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
60 case EngineID::MAXWELL_COMPUTE_B: 61 case EngineID::MAXWELL_COMPUTE_B:
61 maxwell_compute->WriteReg(method, value); 62 maxwell_compute->WriteReg(method, value);
62 break; 63 break;
64 case EngineID::MAXWELL_DMA_COPY_A:
65 maxwell_dma->WriteReg(method, value);
66 break;
63 default: 67 default:
64 UNIMPLEMENTED(); 68 UNIMPLEMENTED_MSG("Unimplemented engine");
65 } 69 }
66} 70}
67 71
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..998b7c843 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
47 47
48 if (regs.src.linear == regs.dst.linear) { 48 if (regs.src.linear == regs.dst.linear) {
49 // If the input layout and the output layout are the same, just perform a raw copy. 49 // If the input layout and the output layout are the same, just perform a raw copy.
50 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
50 Memory::CopyBlock(dest_cpu, source_cpu, 51 Memory::CopyBlock(dest_cpu, source_cpu,
51 src_bytes_per_pixel * regs.dst.width * regs.dst.height); 52 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
52 return; 53 return;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..442138988
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,69 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/memory.h"
6#include "video_core/engines/maxwell_dma.h"
7#include "video_core/textures/decoders.h"
8
9namespace Tegra {
10namespace Engines {
11
12MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
13
14void MaxwellDMA::WriteReg(u32 method, u32 value) {
15 ASSERT_MSG(method < Regs::NUM_REGS,
16 "Invalid MaxwellDMA register, increase the size of the Regs structure");
17
18 regs.reg_array[method] = value;
19
20#define MAXWELLDMA_REG_INDEX(field_name) \
21 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
22
23 switch (method) {
24 case MAXWELLDMA_REG_INDEX(exec): {
25 HandleCopy();
26 break;
27 }
28 }
29
30#undef MAXWELLDMA_REG_INDEX
31}
32
33void MaxwellDMA::HandleCopy() {
34 NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
35
36 const GPUVAddr source = regs.src_address.Address();
37 const GPUVAddr dest = regs.dst_address.Address();
38
39 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
40 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
41
42 // TODO(Subv): Perform more research and implement all features of this engine.
43 ASSERT(regs.exec.enable_swizzle == 0);
44 ASSERT(regs.exec.enable_2d == 1);
45 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
46 ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
47 ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
48 ASSERT(regs.src_params.pos_x == 0);
49 ASSERT(regs.src_params.pos_y == 0);
50 ASSERT(regs.dst_params.pos_x == 0);
51 ASSERT(regs.dst_params.pos_y == 0);
52 ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
53
54 u8* src_buffer = Memory::GetPointer(source_cpu);
55 u8* dst_buffer = Memory::GetPointer(dest_cpu);
56
57 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
58 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
59 Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
60 dst_buffer, true, regs.src_params.BlockHeight());
61 } else {
62 // If the input is linear and the output is tiled, swizzle the input and copy it over.
63 Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
64 src_buffer, false, regs.dst_params.BlockHeight());
65 }
66}
67
68} // namespace Engines
69} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include "common/assert.h"
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13#include "video_core/memory_manager.h"
14
15namespace Tegra {
16namespace Engines {
17
18class MaxwellDMA final {
19public:
20 explicit MaxwellDMA(MemoryManager& memory_manager);
21 ~MaxwellDMA() = default;
22
23 /// Write the value to the register identified by method.
24 void WriteReg(u32 method, u32 value);
25
26 struct Regs {
27 static constexpr size_t NUM_REGS = 0x1D6;
28
29 struct Parameters {
30 union {
31 BitField<0, 4, u32> block_depth;
32 BitField<4, 4, u32> block_height;
33 BitField<8, 4, u32> block_width;
34 };
35 u32 size_x;
36 u32 size_y;
37 u32 size_z;
38 u32 pos_z;
39 union {
40 BitField<0, 16, u32> pos_x;
41 BitField<16, 16, u32> pos_y;
42 };
43
44 u32 BlockHeight() const {
45 return 1 << block_height;
46 }
47 };
48
49 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
50
51 enum class CopyMode : u32 {
52 None = 0,
53 Unk1 = 1,
54 Unk2 = 2,
55 };
56
57 enum class QueryMode : u32 {
58 None = 0,
59 Short = 1,
60 Long = 2,
61 };
62
63 enum class QueryIntr : u32 {
64 None = 0,
65 Block = 1,
66 NonBlock = 2,
67 };
68
69 union {
70 struct {
71 INSERT_PADDING_WORDS(0xC0);
72
73 struct {
74 union {
75 BitField<0, 2, CopyMode> copy_mode;
76 BitField<2, 1, u32> flush;
77
78 BitField<3, 2, QueryMode> query_mode;
79 BitField<5, 2, QueryIntr> query_intr;
80
81 BitField<7, 1, u32> is_src_linear;
82 BitField<8, 1, u32> is_dst_linear;
83
84 BitField<9, 1, u32> enable_2d;
85 BitField<10, 1, u32> enable_swizzle;
86 };
87 } exec;
88
89 INSERT_PADDING_WORDS(0x3F);
90
91 struct {
92 u32 address_high;
93 u32 address_low;
94
95 GPUVAddr Address() const {
96 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
97 address_low);
98 }
99 } src_address;
100
101 struct {
102 u32 address_high;
103 u32 address_low;
104
105 GPUVAddr Address() const {
106 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
107 address_low);
108 }
109 } dst_address;
110
111 u32 src_pitch;
112 u32 dst_pitch;
113 u32 x_count;
114 u32 y_count;
115
116 INSERT_PADDING_WORDS(0xBB);
117
118 Parameters dst_params;
119
120 INSERT_PADDING_WORDS(1);
121
122 Parameters src_params;
123
124 INSERT_PADDING_WORDS(0x13);
125 };
126 std::array<u32, NUM_REGS> reg_array;
127 };
128 } regs{};
129
130 MemoryManager& memory_manager;
131
132private:
133 /// Performs the copy from the source buffer to the destination buffer as configured in the
134 /// registers.
135 void HandleCopy();
136};
137
138#define ASSERT_REG_POSITION(field_name, position) \
139 static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
140 "Field " #field_name " has invalid position")
141
142ASSERT_REG_POSITION(exec, 0xC0);
143ASSERT_REG_POSITION(src_address, 0x100);
144ASSERT_REG_POSITION(dst_address, 0x102);
145ASSERT_REG_POSITION(src_pitch, 0x104);
146ASSERT_REG_POSITION(dst_pitch, 0x105);
147ASSERT_REG_POSITION(x_count, 0x106);
148ASSERT_REG_POSITION(y_count, 0x107);
149ASSERT_REG_POSITION(dst_params, 0x1C3);
150ASSERT_REG_POSITION(src_params, 0x1CA);
151
152#undef ASSERT_REG_POSITION
153
154} // namespace Engines
155} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 66351fe6e..e36483145 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -5,6 +5,7 @@
5#include "video_core/engines/fermi_2d.h" 5#include "video_core/engines/fermi_2d.h"
6#include "video_core/engines/maxwell_3d.h" 6#include "video_core/engines/maxwell_3d.h"
7#include "video_core/engines/maxwell_compute.h" 7#include "video_core/engines/maxwell_compute.h"
8#include "video_core/engines/maxwell_dma.h"
8#include "video_core/gpu.h" 9#include "video_core/gpu.h"
9 10
10namespace Tegra { 11namespace Tegra {
@@ -14,6 +15,7 @@ GPU::GPU() {
14 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager); 15 maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
15 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager); 16 fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
16 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 17 maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
18 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
17} 19}
18 20
19GPU::~GPU() = default; 21GPU::~GPU() = default;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 5852b9619..7b4e9b842 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -63,6 +63,7 @@ namespace Engines {
63class Fermi2D; 63class Fermi2D;
64class Maxwell3D; 64class Maxwell3D;
65class MaxwellCompute; 65class MaxwellCompute;
66class MaxwellDMA;
66} // namespace Engines 67} // namespace Engines
67 68
68enum class EngineID { 69enum class EngineID {
@@ -103,6 +104,8 @@ private:
103 std::unique_ptr<Engines::Fermi2D> fermi_2d; 104 std::unique_ptr<Engines::Fermi2D> fermi_2d;
104 /// Compute engine 105 /// Compute engine
105 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute; 106 std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
107 /// DMA engine
108 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
106}; 109};
107 110
108} // namespace Tegra 111} // namespace Tegra