diff options
| author | 2019-04-22 19:05:43 -0400 | |
|---|---|---|
| committer | 2019-04-22 19:05:43 -0400 | |
| commit | e4ff140b99339589d87836f865fc437719adbbe9 (patch) | |
| tree | 2033d609fed3ce6cb2728076e629caaa7e06cce8 /src | |
| parent | Revamp Kepler Memory to use a subegine to manage uploads (diff) | |
| download | yuzu-e4ff140b99339589d87836f865fc437719adbbe9.tar.gz yuzu-e4ff140b99339589d87836f865fc437719adbbe9.tar.xz yuzu-e4ff140b99339589d87836f865fc437719adbbe9.zip | |
Introduce skeleton of the GPU Compute Engine.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 171 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 |
3 files changed, 202 insertions, 8 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d950460..28f1f6a7d 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -4,12 +4,21 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | ||
| 7 | #include "video_core/engines/kepler_compute.h" | 8 | #include "video_core/engines/kepler_compute.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | #include "video_core/textures/decoders.h" | ||
| 9 | 14 | ||
| 10 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 11 | 16 | ||
| 12 | KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} | 17 | KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 18 | MemoryManager& memory_manager) | ||
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ | ||
| 20 | memory_manager, | ||
| 21 | regs.upload} {} | ||
| 13 | 22 | ||
| 14 | KeplerCompute::~KeplerCompute() = default; | 23 | KeplerCompute::~KeplerCompute() = default; |
| 15 | 24 | ||
| @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 20 | regs.reg_array[method_call.method] = method_call.argument; | 29 | regs.reg_array[method_call.method] = method_call.argument; |
| 21 | 30 | ||
| 22 | switch (method_call.method) { | 31 | switch (method_call.method) { |
| 32 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | ||
| 33 | upload_state.ProcessExec(regs.exec_upload.linear != 0); | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | ||
| 37 | bool is_last_call = method_call.IsLastCall(); | ||
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 39 | if (is_last_call) { | ||
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 41 | } | ||
| 42 | break; | ||
| 43 | } | ||
| 23 | case KEPLER_COMPUTE_REG_INDEX(launch): | 44 | case KEPLER_COMPUTE_REG_INDEX(launch): |
| 24 | // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA | 45 | ProcessLaunch(); |
| 25 | // kernels) | ||
| 26 | UNREACHABLE_MSG("Compute shaders are not implemented"); | ||
| 27 | break; | 46 | break; |
| 28 | default: | 47 | default: |
| 29 | break; | 48 | break; |
| 30 | } | 49 | } |
| 31 | } | 50 | } |
| 32 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | ||
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | ||
| 57 | |||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | ||
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | ||
| 60 | } | ||
| 61 | |||
| 33 | } // namespace Tegra::Engines | 62 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf432..ab2781b4b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -6,14 +6,25 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 10 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | ||
| 11 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 12 | 15 | ||
| 16 | namespace Core { | ||
| 17 | class System; | ||
| 18 | } | ||
| 19 | |||
| 13 | namespace Tegra { | 20 | namespace Tegra { |
| 14 | class MemoryManager; | 21 | class MemoryManager; |
| 15 | } | 22 | } |
| 16 | 23 | ||
| 24 | namespace VideoCore { | ||
| 25 | class RasterizerInterface; | ||
| 26 | } | ||
| 27 | |||
| 17 | namespace Tegra::Engines { | 28 | namespace Tegra::Engines { |
| 18 | 29 | ||
| 19 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 30 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| @@ -21,7 +32,8 @@ namespace Tegra::Engines { | |||
| 21 | 32 | ||
| 22 | class KeplerCompute final { | 33 | class KeplerCompute final { |
| 23 | public: | 34 | public: |
| 24 | explicit KeplerCompute(MemoryManager& memory_manager); | 35 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 36 | MemoryManager& memory_manager); | ||
| 25 | ~KeplerCompute(); | 37 | ~KeplerCompute(); |
| 26 | 38 | ||
| 27 | static constexpr std::size_t NumConstBuffers = 8; | 39 | static constexpr std::size_t NumConstBuffers = 8; |
| @@ -31,30 +43,183 @@ public: | |||
| 31 | 43 | ||
| 32 | union { | 44 | union { |
| 33 | struct { | 45 | struct { |
| 34 | INSERT_PADDING_WORDS(0xAF); | 46 | INSERT_PADDING_WORDS(0x60); |
| 47 | |||
| 48 | Upload::Data upload; | ||
| 49 | |||
| 50 | struct { | ||
| 51 | union { | ||
| 52 | BitField<0, 1, u32> linear; | ||
| 53 | }; | ||
| 54 | } exec_upload; | ||
| 55 | |||
| 56 | u32 data_upload; | ||
| 57 | |||
| 58 | INSERT_PADDING_WORDS(0x3F); | ||
| 59 | |||
| 60 | struct { | ||
| 61 | u32 address; | ||
| 62 | GPUVAddr Address() const { | ||
| 63 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); | ||
| 64 | } | ||
| 65 | } launch_desc_loc; | ||
| 66 | |||
| 67 | INSERT_PADDING_WORDS(0x1); | ||
| 35 | 68 | ||
| 36 | u32 launch; | 69 | u32 launch; |
| 37 | 70 | ||
| 38 | INSERT_PADDING_WORDS(0xC48); | 71 | INSERT_PADDING_WORDS(0x4A7); |
| 72 | |||
| 73 | struct { | ||
| 74 | u32 address_high; | ||
| 75 | u32 address_low; | ||
| 76 | u32 limit; | ||
| 77 | GPUVAddr Address() const { | ||
| 78 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 79 | address_low); | ||
| 80 | } | ||
| 81 | } tsc; | ||
| 82 | |||
| 83 | INSERT_PADDING_WORDS(0x3); | ||
| 84 | |||
| 85 | struct { | ||
| 86 | u32 address_high; | ||
| 87 | u32 address_low; | ||
| 88 | u32 limit; | ||
| 89 | GPUVAddr Address() const { | ||
| 90 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 91 | address_low); | ||
| 92 | } | ||
| 93 | } tic; | ||
| 94 | |||
| 95 | INSERT_PADDING_WORDS(0x22); | ||
| 96 | |||
| 97 | struct { | ||
| 98 | u32 address_high; | ||
| 99 | u32 address_low; | ||
| 100 | GPUVAddr Address() const { | ||
| 101 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 102 | address_low); | ||
| 103 | } | ||
| 104 | } code_loc; | ||
| 105 | |||
| 106 | INSERT_PADDING_WORDS(0x3FE); | ||
| 107 | |||
| 108 | u32 texture_const_buffer_index; | ||
| 109 | |||
| 110 | INSERT_PADDING_WORDS(0x374); | ||
| 39 | }; | 111 | }; |
| 40 | std::array<u32, NUM_REGS> reg_array; | 112 | std::array<u32, NUM_REGS> reg_array; |
| 41 | }; | 113 | }; |
| 42 | } regs{}; | 114 | } regs{}; |
| 115 | |||
| 116 | struct LaunchParams { | ||
| 117 | static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; | ||
| 118 | |||
| 119 | INSERT_PADDING_WORDS(0x8); | ||
| 120 | |||
| 121 | u32 program_start; | ||
| 122 | |||
| 123 | INSERT_PADDING_WORDS(0x2); | ||
| 124 | |||
| 125 | BitField<30, 1, u32> linked_tsc; | ||
| 126 | |||
| 127 | BitField<0, 31, u32> grid_dim_x; | ||
| 128 | |||
| 129 | union { | ||
| 130 | BitField<0, 16, u32> grid_dim_y; | ||
| 131 | BitField<16, 16, u32> grid_dim_z; | ||
| 132 | }; | ||
| 133 | |||
| 134 | INSERT_PADDING_WORDS(0x3); | ||
| 135 | |||
| 136 | BitField<0, 16, u32> shared_alloc; | ||
| 137 | |||
| 138 | BitField<0, 31, u32> block_dim_x; | ||
| 139 | |||
| 140 | union { | ||
| 141 | BitField<0, 16, u32> block_dim_y; | ||
| 142 | BitField<16, 16, u32> block_dim_z; | ||
| 143 | }; | ||
| 144 | |||
| 145 | union { | ||
| 146 | BitField<0, 8, u32> const_buffer_enable_mask; | ||
| 147 | BitField<29, 2, u32> cache_layout; | ||
| 148 | } memory_config; | ||
| 149 | |||
| 150 | INSERT_PADDING_WORDS(0x8); | ||
| 151 | |||
| 152 | struct { | ||
| 153 | u32 address_low; | ||
| 154 | union { | ||
| 155 | BitField<0, 8, u32> address_high; | ||
| 156 | BitField<15, 17, u32> size; | ||
| 157 | }; | ||
| 158 | GPUVAddr Address() const { | ||
| 159 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | | ||
| 160 | address_low); | ||
| 161 | } | ||
| 162 | } const_buffer_config[8]; | ||
| 163 | |||
| 164 | union { | ||
| 165 | BitField<0, 20, u32> local_pos_alloc; | ||
| 166 | BitField<27, 5, u32> barrier_alloc; | ||
| 167 | }; | ||
| 168 | |||
| 169 | union { | ||
| 170 | BitField<0, 20, u32> local_neg_alloc; | ||
| 171 | BitField<24, 5, u32> gpr_alloc; | ||
| 172 | }; | ||
| 173 | |||
| 174 | INSERT_PADDING_WORDS(0x11); | ||
| 175 | } launch_description; | ||
| 176 | |||
| 177 | struct { | ||
| 178 | u32 write_offset = 0; | ||
| 179 | u32 copy_size = 0; | ||
| 180 | std::vector<u8> inner_buffer; | ||
| 181 | } state{}; | ||
| 182 | |||
| 43 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), | 183 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), |
| 44 | "KeplerCompute Regs has wrong size"); | 184 | "KeplerCompute Regs has wrong size"); |
| 45 | 185 | ||
| 186 | static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), | ||
| 187 | "KeplerCompute LaunchParams has wrong size"); | ||
| 188 | |||
| 46 | /// Write the value to the register identified by method. | 189 | /// Write the value to the register identified by method. |
| 47 | void CallMethod(const GPU::MethodCall& method_call); | 190 | void CallMethod(const GPU::MethodCall& method_call); |
| 48 | 191 | ||
| 49 | private: | 192 | private: |
| 193 | Core::System& system; | ||
| 194 | VideoCore::RasterizerInterface& rasterizer; | ||
| 50 | MemoryManager& memory_manager; | 195 | MemoryManager& memory_manager; |
| 196 | Upload::State upload_state; | ||
| 197 | |||
| 198 | void ProcessLaunch(); | ||
| 51 | }; | 199 | }; |
| 52 | 200 | ||
| 53 | #define ASSERT_REG_POSITION(field_name, position) \ | 201 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 54 | static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ | 202 | static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ |
| 55 | "Field " #field_name " has invalid position") | 203 | "Field " #field_name " has invalid position") |
| 56 | 204 | ||
| 205 | #define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ | ||
| 206 | static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ | ||
| 207 | "Field " #field_name " has invalid position") | ||
| 208 | |||
| 209 | ASSERT_REG_POSITION(upload, 0x60); | ||
| 210 | ASSERT_REG_POSITION(exec_upload, 0x6C); | ||
| 211 | ASSERT_REG_POSITION(data_upload, 0x6D); | ||
| 57 | ASSERT_REG_POSITION(launch, 0xAF); | 212 | ASSERT_REG_POSITION(launch, 0xAF); |
| 213 | ASSERT_REG_POSITION(tsc, 0x557); | ||
| 214 | ASSERT_REG_POSITION(tic, 0x55D); | ||
| 215 | ASSERT_REG_POSITION(code_loc, 0x582); | ||
| 216 | ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); | ||
| 217 | ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); | ||
| 218 | ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); | ||
| 219 | ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); | ||
| 220 | ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); | ||
| 221 | ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); | ||
| 222 | ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); | ||
| 58 | 223 | ||
| 59 | #undef ASSERT_REG_POSITION | 224 | #undef ASSERT_REG_POSITION |
| 60 | 225 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 9db6e4763..52706505b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -35,7 +35,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren | |||
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
| 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); | 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); |
| 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 41 | } | 41 | } |