summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/engine_upload.cpp48
-rw-r--r--src/video_core/engines/engine_upload.h75
-rw-r--r--src/video_core/engines/fermi_2d.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp37
-rw-r--r--src/video_core/engines/kepler_compute.h175
-rw-r--r--src/video_core/engines/kepler_memory.cpp45
-rw-r--r--src/video_core/engines/kepler_memory.h66
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h25
-rw-r--r--src/video_core/engines/maxwell_dma.cpp83
-rw-r--r--src/video_core/engines/maxwell_dma.h43
-rw-r--r--src/video_core/gpu.cpp4
13 files changed, 483 insertions, 142 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6821f275d..1e010e4da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
3 dma_pusher.h 3 dma_pusher.h
4 debug_utils/debug_utils.cpp 4 debug_utils/debug_utils.cpp
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/engine_upload.cpp
7 engines/engine_upload.h
6 engines/fermi_2d.cpp 8 engines/fermi_2d.cpp
7 engines/fermi_2d.h 9 engines/fermi_2d.h
8 engines/kepler_compute.cpp 10 engines/kepler_compute.cpp
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
new file mode 100644
index 000000000..f8aa4ff55
--- /dev/null
+++ b/src/video_core/engines/engine_upload.cpp
@@ -0,0 +1,48 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/engines/engine_upload.h"
7#include "video_core/memory_manager.h"
8#include "video_core/textures/decoders.h"
9
10namespace Tegra::Engines::Upload {
11
12State::State(MemoryManager& memory_manager, Registers& regs)
13 : memory_manager(memory_manager), regs(regs) {}
14
15void State::ProcessExec(const bool is_linear) {
16 write_offset = 0;
17 copy_size = regs.line_length_in * regs.line_count;
18 inner_buffer.resize(copy_size);
19 this->is_linear = is_linear;
20}
21
22void State::ProcessData(const u32 data, const bool is_last_call) {
23 const u32 sub_copy_size = std::min(4U, copy_size - write_offset);
24 std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size);
25 write_offset += sub_copy_size;
26 if (!is_last_call) {
27 return;
28 }
29 const GPUVAddr address{regs.dest.Address()};
30 if (is_linear) {
31 memory_manager.WriteBlock(address, inner_buffer.data(), copy_size);
32 } else {
33 UNIMPLEMENTED_IF(regs.dest.z != 0);
34 UNIMPLEMENTED_IF(regs.dest.depth != 1);
35 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
36 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
37 const std::size_t dst_size = Tegra::Texture::CalculateSize(
38 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
39 tmp_buffer.resize(dst_size);
40 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
41 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y,
42 regs.dest.BlockHeight(), copy_size, inner_buffer.data(),
43 tmp_buffer.data());
44 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
45 }
46}
47
48} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
new file mode 100644
index 000000000..9c6e0d21c
--- /dev/null
+++ b/src/video_core/engines/engine_upload.h
@@ -0,0 +1,75 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <vector>
9#include "common/bit_field.h"
10#include "common/common_funcs.h"
11#include "common/common_types.h"
12
13namespace Tegra {
14class MemoryManager;
15}
16
17namespace Tegra::Engines::Upload {
18
19struct Registers {
20 u32 line_length_in;
21 u32 line_count;
22
23 struct {
24 u32 address_high;
25 u32 address_low;
26 u32 pitch;
27 union {
28 BitField<0, 4, u32> block_width;
29 BitField<4, 4, u32> block_height;
30 BitField<8, 4, u32> block_depth;
31 };
32 u32 width;
33 u32 height;
34 u32 depth;
35 u32 z;
36 u32 x;
37 u32 y;
38
39 GPUVAddr Address() const {
40 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
41 }
42
43 u32 BlockWidth() const {
44 return 1U << block_width.Value();
45 }
46
47 u32 BlockHeight() const {
48 return 1U << block_height.Value();
49 }
50
51 u32 BlockDepth() const {
52 return 1U << block_depth.Value();
53 }
54 } dest;
55};
56
57class State {
58public:
59 State(MemoryManager& memory_manager, Registers& regs);
60 ~State() = default;
61
62 void ProcessExec(const bool is_linear);
63 void ProcessData(const u32 data, const bool is_last_call);
64
65private:
66 u32 write_offset = 0;
67 u32 copy_size = 0;
68 std::vector<u8> inner_buffer;
69 std::vector<u8> tmp_buffer;
70 bool is_linear = false;
71 Registers& regs;
72 MemoryManager& memory_manager;
73};
74
75} // namespace Tegra::Engines::Upload
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2e51b7f13..45f59a4d9 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -21,6 +21,12 @@ class RasterizerInterface;
21 21
22namespace Tegra::Engines { 22namespace Tegra::Engines {
23 23
24/**
25 * This Engine is known as G80_2D. Documentation can be found in:
26 * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml
27 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
28 */
29
24#define FERMI2D_REG_INDEX(field_name) \ 30#define FERMI2D_REG_INDEX(field_name) \
25 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) 31 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
26 32
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index b1d950460..7404a8163 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -4,12 +4,21 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h"
7#include "video_core/engines/kepler_compute.h" 8#include "video_core/engines/kepler_compute.h"
9#include "video_core/engines/maxwell_3d.h"
8#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
13#include "video_core/textures/decoders.h"
9 14
10namespace Tegra::Engines { 15namespace Tegra::Engines {
11 16
12KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} 17KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
18 MemoryManager& memory_manager)
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
20 memory_manager,
21 regs.upload} {}
13 22
14KeplerCompute::~KeplerCompute() = default; 23KeplerCompute::~KeplerCompute() = default;
15 24
@@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
20 regs.reg_array[method_call.method] = method_call.argument; 29 regs.reg_array[method_call.method] = method_call.argument;
21 30
22 switch (method_call.method) { 31 switch (method_call.method) {
32 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
33 upload_state.ProcessExec(regs.exec_upload.linear != 0);
34 break;
35 }
36 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
41 }
42 break;
43 }
23 case KEPLER_COMPUTE_REG_INDEX(launch): 44 case KEPLER_COMPUTE_REG_INDEX(launch):
24 // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA 45 ProcessLaunch();
25 // kernels)
26 UNREACHABLE_MSG("Compute shaders are not implemented");
27 break; 46 break;
28 default: 47 default:
29 break; 48 break;
30 } 49 }
31} 50}
32 51
52void KeplerCompute::ProcessLaunch() {
53
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc);
60}
61
33} // namespace Tegra::Engines 62} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fb6cdf432..5250b8d9b 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -6,22 +6,40 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
10#include "common/bit_field.h"
9#include "common/common_funcs.h" 11#include "common/common_funcs.h"
10#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_upload.h"
11#include "video_core/gpu.h" 14#include "video_core/gpu.h"
12 15
16namespace Core {
17class System;
18}
19
13namespace Tegra { 20namespace Tegra {
14class MemoryManager; 21class MemoryManager;
15} 22}
16 23
24namespace VideoCore {
25class RasterizerInterface;
26}
27
17namespace Tegra::Engines { 28namespace Tegra::Engines {
18 29
30/**
31 * This Engine is known as GK104_Compute. Documentation can be found in:
32 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml
33 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
34 */
35
19#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 36#define KEPLER_COMPUTE_REG_INDEX(field_name) \
20 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 37 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
21 38
22class KeplerCompute final { 39class KeplerCompute final {
23public: 40public:
24 explicit KeplerCompute(MemoryManager& memory_manager); 41 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
42 MemoryManager& memory_manager);
25 ~KeplerCompute(); 43 ~KeplerCompute();
26 44
27 static constexpr std::size_t NumConstBuffers = 8; 45 static constexpr std::size_t NumConstBuffers = 8;
@@ -31,30 +49,181 @@ public:
31 49
32 union { 50 union {
33 struct { 51 struct {
34 INSERT_PADDING_WORDS(0xAF); 52 INSERT_PADDING_WORDS(0x60);
53
54 Upload::Registers upload;
55
56 struct {
57 union {
58 BitField<0, 1, u32> linear;
59 };
60 } exec_upload;
61
62 u32 data_upload;
63
64 INSERT_PADDING_WORDS(0x3F);
65
66 struct {
67 u32 address;
68 GPUVAddr Address() const {
69 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8));
70 }
71 } launch_desc_loc;
72
73 INSERT_PADDING_WORDS(0x1);
35 74
36 u32 launch; 75 u32 launch;
37 76
38 INSERT_PADDING_WORDS(0xC48); 77 INSERT_PADDING_WORDS(0x4A7);
78
79 struct {
80 u32 address_high;
81 u32 address_low;
82 u32 limit;
83 GPUVAddr Address() const {
84 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
85 address_low);
86 }
87 } tsc;
88
89 INSERT_PADDING_WORDS(0x3);
90
91 struct {
92 u32 address_high;
93 u32 address_low;
94 u32 limit;
95 GPUVAddr Address() const {
96 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
97 address_low);
98 }
99 } tic;
100
101 INSERT_PADDING_WORDS(0x22);
102
103 struct {
104 u32 address_high;
105 u32 address_low;
106 GPUVAddr Address() const {
107 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
108 address_low);
109 }
110 } code_loc;
111
112 INSERT_PADDING_WORDS(0x3FE);
113
114 u32 texture_const_buffer_index;
115
116 INSERT_PADDING_WORDS(0x374);
39 }; 117 };
40 std::array<u32, NUM_REGS> reg_array; 118 std::array<u32, NUM_REGS> reg_array;
41 }; 119 };
42 } regs{}; 120 } regs{};
121
122 struct LaunchParams {
123 static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40;
124
125 INSERT_PADDING_WORDS(0x8);
126
127 u32 program_start;
128
129 INSERT_PADDING_WORDS(0x2);
130
131 BitField<30, 1, u32> linked_tsc;
132
133 BitField<0, 31, u32> grid_dim_x;
134 union {
135 BitField<0, 16, u32> grid_dim_y;
136 BitField<16, 16, u32> grid_dim_z;
137 };
138
139 INSERT_PADDING_WORDS(0x3);
140
141 BitField<0, 16, u32> shared_alloc;
142
143 BitField<0, 31, u32> block_dim_x;
144 union {
145 BitField<0, 16, u32> block_dim_y;
146 BitField<16, 16, u32> block_dim_z;
147 };
148
149 union {
150 BitField<0, 8, u32> const_buffer_enable_mask;
151 BitField<29, 2, u32> cache_layout;
152 } memory_config;
153
154 INSERT_PADDING_WORDS(0x8);
155
156 struct {
157 u32 address_low;
158 union {
159 BitField<0, 8, u32> address_high;
160 BitField<15, 17, u32> size;
161 };
162 GPUVAddr Address() const {
163 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) |
164 address_low);
165 }
166 } const_buffer_config[8];
167
168 union {
169 BitField<0, 20, u32> local_pos_alloc;
170 BitField<27, 5, u32> barrier_alloc;
171 };
172
173 union {
174 BitField<0, 20, u32> local_neg_alloc;
175 BitField<24, 5, u32> gpr_alloc;
176 };
177
178 INSERT_PADDING_WORDS(0x11);
179 } launch_description;
180
181 struct {
182 u32 write_offset = 0;
183 u32 copy_size = 0;
184 std::vector<u8> inner_buffer;
185 } state{};
186
43 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), 187 static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32),
44 "KeplerCompute Regs has wrong size"); 188 "KeplerCompute Regs has wrong size");
45 189
190 static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32),
191 "KeplerCompute LaunchParams has wrong size");
192
46 /// Write the value to the register identified by method. 193 /// Write the value to the register identified by method.
47 void CallMethod(const GPU::MethodCall& method_call); 194 void CallMethod(const GPU::MethodCall& method_call);
48 195
49private: 196private:
197 Core::System& system;
198 VideoCore::RasterizerInterface& rasterizer;
50 MemoryManager& memory_manager; 199 MemoryManager& memory_manager;
200 Upload::State upload_state;
201
202 void ProcessLaunch();
51}; 203};
52 204
53#define ASSERT_REG_POSITION(field_name, position) \ 205#define ASSERT_REG_POSITION(field_name, position) \
54 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ 206 static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \
55 "Field " #field_name " has invalid position") 207 "Field " #field_name " has invalid position")
56 208
209#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \
210 static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \
211 "Field " #field_name " has invalid position")
212
213ASSERT_REG_POSITION(upload, 0x60);
214ASSERT_REG_POSITION(exec_upload, 0x6C);
215ASSERT_REG_POSITION(data_upload, 0x6D);
57ASSERT_REG_POSITION(launch, 0xAF); 216ASSERT_REG_POSITION(launch, 0xAF);
217ASSERT_REG_POSITION(tsc, 0x557);
218ASSERT_REG_POSITION(tic, 0x55D);
219ASSERT_REG_POSITION(code_loc, 0x582);
220ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
221ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
222ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
223ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
224ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
225ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
226ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
58 227
59#undef ASSERT_REG_POSITION 228#undef ASSERT_REG_POSITION
60 229
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 7387886a3..0561f676c 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,9 +14,8 @@
14 14
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
18 MemoryManager& memory_manager) 18 : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
20 19
21KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
22 21
@@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
28 27
29 switch (method_call.method) { 28 switch (method_call.method) {
30 case KEPLERMEMORY_REG_INDEX(exec): { 29 case KEPLERMEMORY_REG_INDEX(exec): {
31 ProcessExec(); 30 upload_state.ProcessExec(regs.exec.linear != 0);
32 break; 31 break;
33 } 32 }
34 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
35 ProcessData(method_call.argument, method_call.IsLastCall()); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
38 }
36 break; 39 break;
37 } 40 }
38 } 41 }
39} 42}
40 43
41void KeplerMemory::ProcessExec() {
42 state.write_offset = 0;
43 state.copy_size = regs.line_length_in * regs.line_count;
44 state.inner_buffer.resize(state.copy_size);
45}
46
47void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
48 const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
49 std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
50 state.write_offset += sub_copy_size;
51 if (is_last_call) {
52 const GPUVAddr address{regs.dest.Address()};
53 if (regs.exec.linear != 0) {
54 memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
55 } else {
56 UNIMPLEMENTED_IF(regs.dest.z != 0);
57 UNIMPLEMENTED_IF(regs.dest.depth != 1);
58 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
59 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
60 const std::size_t dst_size = Tegra::Texture::CalculateSize(
61 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
62 std::vector<u8> tmp_buffer(dst_size);
63 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
64 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
65 regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
66 state.inner_buffer.data(), tmp_buffer.data());
67 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
68 }
69 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
70 }
71}
72
73} // namespace Tegra::Engines 44} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5f892ddad..f3bc675a9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_upload.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Core { 16namespace Core {
@@ -20,19 +21,20 @@ namespace Tegra {
20class MemoryManager; 21class MemoryManager;
21} 22}
22 23
23namespace VideoCore {
24class RasterizerInterface;
25}
26
27namespace Tegra::Engines { 24namespace Tegra::Engines {
28 25
26/**
27 * This Engine is known as P2MF. Documentation can be found in:
28 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml
29 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
30 */
31
29#define KEPLERMEMORY_REG_INDEX(field_name) \ 32#define KEPLERMEMORY_REG_INDEX(field_name) \
30 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) 33 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
31 34
32class KeplerMemory final { 35class KeplerMemory final {
33public: 36public:
34 KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 37 KeplerMemory(Core::System& system, MemoryManager& memory_manager);
35 MemoryManager& memory_manager);
36 ~KeplerMemory(); 38 ~KeplerMemory();
37 39
38 /// Write the value to the register identified by method. 40 /// Write the value to the register identified by method.
@@ -45,42 +47,7 @@ public:
45 struct { 47 struct {
46 INSERT_PADDING_WORDS(0x60); 48 INSERT_PADDING_WORDS(0x60);
47 49
48 u32 line_length_in; 50 Upload::Registers upload;
49 u32 line_count;
50
51 struct {
52 u32 address_high;
53 u32 address_low;
54 u32 pitch;
55 union {
56 BitField<0, 4, u32> block_width;
57 BitField<4, 4, u32> block_height;
58 BitField<8, 4, u32> block_depth;
59 };
60 u32 width;
61 u32 height;
62 u32 depth;
63 u32 z;
64 u32 x;
65 u32 y;
66
67 GPUVAddr Address() const {
68 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
69 address_low);
70 }
71
72 u32 BlockWidth() const {
73 return 1U << block_width.Value();
74 }
75
76 u32 BlockHeight() const {
77 return 1U << block_height.Value();
78 }
79
80 u32 BlockDepth() const {
81 return 1U << block_depth.Value();
82 }
83 } dest;
84 51
85 struct { 52 struct {
86 union { 53 union {
@@ -96,28 +63,17 @@ public:
96 }; 63 };
97 } regs{}; 64 } regs{};
98 65
99 struct {
100 u32 write_offset = 0;
101 u32 copy_size = 0;
102 std::vector<u8> inner_buffer;
103 } state{};
104
105private: 66private:
106 Core::System& system; 67 Core::System& system;
107 VideoCore::RasterizerInterface& rasterizer;
108 MemoryManager& memory_manager; 68 MemoryManager& memory_manager;
109 69 Upload::State upload_state;
110 void ProcessExec();
111 void ProcessData(u32 data, bool is_last_call);
112}; 70};
113 71
114#define ASSERT_REG_POSITION(field_name, position) \ 72#define ASSERT_REG_POSITION(field_name, position) \
115 static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ 73 static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \
116 "Field " #field_name " has invalid position") 74 "Field " #field_name " has invalid position")
117 75
118ASSERT_REG_POSITION(line_length_in, 0x60); 76ASSERT_REG_POSITION(upload, 0x60);
119ASSERT_REG_POSITION(line_count, 0x61);
120ASSERT_REG_POSITION(dest, 0x62);
121ASSERT_REG_POSITION(exec, 0x6C); 77ASSERT_REG_POSITION(exec, 0x6C);
122ASSERT_REG_POSITION(data, 0x6D); 78ASSERT_REG_POSITION(data, 0x6D);
123#undef ASSERT_REG_POSITION 79#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 9780417f2..d7b586db9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
20 20
21Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 21Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 *this} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitializeRegisterDefaults(); 25 InitializeRegisterDefaults();
26} 26}
27 27
@@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
253 ProcessSyncPoint(); 253 ProcessSyncPoint();
254 break; 254 break;
255 } 255 }
256 case MAXWELL3D_REG_INDEX(exec_upload): {
257 upload_state.ProcessExec(regs.exec_upload.linear != 0);
258 break;
259 }
260 case MAXWELL3D_REG_INDEX(data_upload): {
261 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) {
264 dirty_flags.OnMemoryWrite();
265 }
266 break;
267 }
256 default: 268 default:
257 break; 269 break;
258 } 270 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 85d309d9b..4883b582a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -14,6 +14,7 @@
14#include "common/common_funcs.h" 14#include "common/common_funcs.h"
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/math_util.h" 16#include "common/math_util.h"
17#include "video_core/engines/engine_upload.h"
17#include "video_core/gpu.h" 18#include "video_core/gpu.h"
18#include "video_core/macro_interpreter.h" 19#include "video_core/macro_interpreter.h"
19#include "video_core/textures/texture.h" 20#include "video_core/textures/texture.h"
@@ -32,6 +33,12 @@ class RasterizerInterface;
32 33
33namespace Tegra::Engines { 34namespace Tegra::Engines {
34 35
36/**
37 * This Engine is known as GF100_3D. Documentation can be found in:
38 * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml
39 * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
40 */
41
35#define MAXWELL3D_REG_INDEX(field_name) \ 42#define MAXWELL3D_REG_INDEX(field_name) \
36 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 43 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
37 44
@@ -580,7 +587,18 @@ public:
580 u32 bind; 587 u32 bind;
581 } macros; 588 } macros;
582 589
583 INSERT_PADDING_WORDS(0x69); 590 INSERT_PADDING_WORDS(0x17);
591
592 Upload::Registers upload;
593 struct {
594 union {
595 BitField<0, 1, u32> linear;
596 };
597 } exec_upload;
598
599 u32 data_upload;
600
601 INSERT_PADDING_WORDS(0x44);
584 602
585 struct { 603 struct {
586 union { 604 union {
@@ -1176,6 +1194,8 @@ private:
1176 /// Interpreter for the macro codes uploaded to the GPU. 1194 /// Interpreter for the macro codes uploaded to the GPU.
1177 MacroInterpreter macro_interpreter; 1195 MacroInterpreter macro_interpreter;
1178 1196
1197 Upload::State upload_state;
1198
1179 /// Retrieves information about a specific TIC entry from the TIC buffer. 1199 /// Retrieves information about a specific TIC entry from the TIC buffer.
1180 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1200 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1181 1201
@@ -1219,6 +1239,9 @@ private:
1219 "Field " #field_name " has invalid position") 1239 "Field " #field_name " has invalid position")
1220 1240
1221ASSERT_REG_POSITION(macros, 0x45); 1241ASSERT_REG_POSITION(macros, 0x45);
1242ASSERT_REG_POSITION(upload, 0x60);
1243ASSERT_REG_POSITION(exec_upload, 0x6C);
1244ASSERT_REG_POSITION(data_upload, 0x6D);
1222ASSERT_REG_POSITION(sync_info, 0xB2); 1245ASSERT_REG_POSITION(sync_info, 0xB2);
1223ASSERT_REG_POSITION(tfb_enabled, 0x1D1); 1246ASSERT_REG_POSITION(tfb_enabled, 0x1D1);
1224ASSERT_REG_POSITION(rt, 0x200); 1247ASSERT_REG_POSITION(rt, 0x200);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2426d0067..3a5dfef0c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() {
83 83
84 ASSERT(regs.exec.enable_2d == 1); 84 ASSERT(regs.exec.enable_2d == 1);
85 85
86 const std::size_t copy_size = regs.x_count * regs.y_count; 86 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
87 ASSERT(regs.src_params.size_z == 1);
88 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
89 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
90 const std::size_t src_size = Texture::CalculateSize(
91 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
92 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
87 93
88 auto source_ptr{memory_manager.GetPointer(source)}; 94 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90 95
91 if (!source_ptr) { 96 if (read_buffer.size() < src_size) {
92 LOG_ERROR(HW_GPU, "source_ptr is invalid"); 97 read_buffer.resize(src_size);
93 return; 98 }
94 }
95 99
96 if (!dst_ptr) { 100 if (write_buffer.size() < dst_size) {
97 LOG_ERROR(HW_GPU, "dst_ptr is invalid"); 101 write_buffer.resize(dst_size);
98 return; 102 }
99 }
100 103
101 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 104 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
102 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 105 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
103 // copying.
104 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
105 106
106 // We have to invalidate the destination region to evict any outdated surfaces from the 107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
107 // cache. We do this before actually writing the new data because the destination address 108 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
108 // might contain a dirty surface that will have to be written back to memory. 109 write_buffer.data(), regs.src_params.BlockHeight(),
109 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); 110 regs.src_params.pos_x, regs.src_params.pos_y);
110 };
111 111
112 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 112 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
113 ASSERT(regs.src_params.size_z == 1); 113 } else {
114 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 114 ASSERT(regs.dst_params.BlockDepth() == 1);
115 115
116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
117 117
118 FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, 118 const std::size_t dst_size = Texture::CalculateSize(
119 copy_size * src_bytes_per_pixel); 119 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
120 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
120 121
121 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 122 const std::size_t dst_layer_size = Texture::CalculateSize(
122 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, 123 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
123 regs.src_params.BlockHeight(), regs.src_params.pos_x, 124 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
124 regs.src_params.pos_y);
125 } else {
126 ASSERT(regs.dst_params.size_z == 1);
127 ASSERT(regs.src_pitch == regs.x_count);
128 125
129 const u32 src_bpp = regs.src_pitch / regs.x_count; 126 const std::size_t src_size = regs.src_pitch * regs.y_count;
130 127
131 FlushAndInvalidate(regs.src_pitch * regs.y_count, 128 if (read_buffer.size() < src_size) {
132 regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); 129 read_buffer.resize(src_size);
130 }
131
132 if (write_buffer.size() < dst_size) {
133 write_buffer.resize(dst_size);
134 }
135
136 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
137 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
133 138
134 // If the input is linear and the output is tiled, swizzle the input and copy it over. 139 // If the input is linear and the output is tiled, swizzle the input and copy it over.
135 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 140 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
136 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); 141 src_bytes_per_pixel,
142 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
143 read_buffer.data(), regs.dst_params.BlockHeight());
144
145 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
137 } 146 }
138} 147}
139 148
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c6b649842..e5942f671 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -25,6 +26,11 @@ class RasterizerInterface;
25 26
26namespace Tegra::Engines { 27namespace Tegra::Engines {
27 28
29/**
30 * This Engine is known as GK104_Copy. Documentation can be found in:
31 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
32 */
33
28class MaxwellDMA final { 34class MaxwellDMA final {
29public: 35public:
30 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 36 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
@@ -63,6 +69,16 @@ public:
63 69
64 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); 70 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
65 71
72 enum class ComponentMode : u32 {
73 Src0 = 0,
74 Src1 = 1,
75 Src2 = 2,
76 Src3 = 3,
77 Const0 = 4,
78 Const1 = 5,
79 Zero = 6,
80 };
81
66 enum class CopyMode : u32 { 82 enum class CopyMode : u32 {
67 None = 0, 83 None = 0,
68 Unk1 = 1, 84 Unk1 = 1,
@@ -128,7 +144,26 @@ public:
128 u32 x_count; 144 u32 x_count;
129 u32 y_count; 145 u32 y_count;
130 146
131 INSERT_PADDING_WORDS(0xBB); 147 INSERT_PADDING_WORDS(0xB8);
148
149 u32 const0;
150 u32 const1;
151 union {
152 BitField<0, 4, ComponentMode> component0;
153 BitField<4, 4, ComponentMode> component1;
154 BitField<8, 4, ComponentMode> component2;
155 BitField<12, 4, ComponentMode> component3;
156 BitField<16, 2, u32> component_size;
157 BitField<20, 3, u32> src_num_components;
158 BitField<24, 3, u32> dst_num_components;
159
160 u32 SrcBytePerPixel() const {
161 return src_num_components.Value() * component_size.Value();
162 }
163 u32 DstBytePerPixel() const {
164 return dst_num_components.Value() * component_size.Value();
165 }
166 } swizzle_config;
132 167
133 Parameters dst_params; 168 Parameters dst_params;
134 169
@@ -149,6 +184,9 @@ private:
149 184
150 MemoryManager& memory_manager; 185 MemoryManager& memory_manager;
151 186
187 std::vector<u8> read_buffer;
188 std::vector<u8> write_buffer;
189
152 /// Performs the copy from the source buffer to the destination buffer as configured in the 190 /// Performs the copy from the source buffer to the destination buffer as configured in the
153 /// registers. 191 /// registers.
154 void HandleCopy(); 192 void HandleCopy();
@@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104);
165ASSERT_REG_POSITION(dst_pitch, 0x105); 203ASSERT_REG_POSITION(dst_pitch, 0x105);
166ASSERT_REG_POSITION(x_count, 0x106); 204ASSERT_REG_POSITION(x_count, 0x106);
167ASSERT_REG_POSITION(y_count, 0x107); 205ASSERT_REG_POSITION(y_count, 0x107);
206ASSERT_REG_POSITION(const0, 0x1C0);
207ASSERT_REG_POSITION(const1, 0x1C1);
208ASSERT_REG_POSITION(swizzle_config, 0x1C2);
168ASSERT_REG_POSITION(dst_params, 0x1C3); 209ASSERT_REG_POSITION(dst_params, 0x1C3);
169ASSERT_REG_POSITION(src_params, 0x1CA); 210ASSERT_REG_POSITION(src_params, 0x1CA);
170 211
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 4461083ff..52706505b 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
38 kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); 38 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); 39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); 40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
41} 41}
42 42
43GPU::~GPU() = default; 43GPU::~GPU() = default;