diff options
| author | 2019-05-09 13:19:22 -0400 | |
|---|---|---|
| committer | 2019-05-09 13:19:22 -0400 | |
| commit | c27b81cb85d11b4c31f38c15a5e8e6d7c7211df2 (patch) | |
| tree | c91fe87f5a025effd3941d38653cda0920c68962 /src | |
| parent | Merge pull request #2440 from lioncash/dynarmic (diff) | |
| parent | Refactors and name corrections. (diff) | |
| download | yuzu-c27b81cb85d11b4c31f38c15a5e8e6d7c7211df2.tar.gz yuzu-c27b81cb85d11b4c31f38c15a5e8e6d7c7211df2.tar.xz yuzu-c27b81cb85d11b4c31f38c15a5e8e6d7c7211df2.zip | |
Merge pull request #2429 from FernandoS27/compute
Corrections and Implementation on GPU Engines
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 48 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 75 | ||||
| -rw-r--r-- | src/video_core/engines/fermi_2d.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 175 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 45 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 66 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 25 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 83 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 43 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 4 |
13 files changed, 483 insertions, 142 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6821f275d..1e010e4da 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -3,6 +3,8 @@ add_library(video_core STATIC | |||
| 3 | dma_pusher.h | 3 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 4 | debug_utils/debug_utils.cpp |
| 5 | debug_utils/debug_utils.h | 5 | debug_utils/debug_utils.h |
| 6 | engines/engine_upload.cpp | ||
| 7 | engines/engine_upload.h | ||
| 6 | engines/fermi_2d.cpp | 8 | engines/fermi_2d.cpp |
| 7 | engines/fermi_2d.h | 9 | engines/fermi_2d.h |
| 8 | engines/kepler_compute.cpp | 10 | engines/kepler_compute.cpp |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 000000000..f8aa4ff55 --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/engines/engine_upload.h" | ||
| 7 | #include "video_core/memory_manager.h" | ||
| 8 | #include "video_core/textures/decoders.h" | ||
| 9 | |||
| 10 | namespace Tegra::Engines::Upload { | ||
| 11 | |||
| 12 | State::State(MemoryManager& memory_manager, Registers& regs) | ||
| 13 | : memory_manager(memory_manager), regs(regs) {} | ||
| 14 | |||
| 15 | void State::ProcessExec(const bool is_linear) { | ||
| 16 | write_offset = 0; | ||
| 17 | copy_size = regs.line_length_in * regs.line_count; | ||
| 18 | inner_buffer.resize(copy_size); | ||
| 19 | this->is_linear = is_linear; | ||
| 20 | } | ||
| 21 | |||
| 22 | void State::ProcessData(const u32 data, const bool is_last_call) { | ||
| 23 | const u32 sub_copy_size = std::min(4U, copy_size - write_offset); | ||
| 24 | std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); | ||
| 25 | write_offset += sub_copy_size; | ||
| 26 | if (!is_last_call) { | ||
| 27 | return; | ||
| 28 | } | ||
| 29 | const GPUVAddr address{regs.dest.Address()}; | ||
| 30 | if (is_linear) { | ||
| 31 | memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); | ||
| 32 | } else { | ||
| 33 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 34 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 35 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 36 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 37 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 38 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 39 | tmp_buffer.resize(dst_size); | ||
| 40 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 41 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | ||
| 42 | regs.dest.BlockHeight(), copy_size, inner_buffer.data(), | ||
| 43 | tmp_buffer.data()); | ||
| 44 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Tegra::Engines::Upload | ||
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 000000000..9c6e0d21c --- /dev/null +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_funcs.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | class MemoryManager; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Tegra::Engines::Upload { | ||
| 18 | |||
| 19 | struct Registers { | ||
| 20 | u32 line_length_in; | ||
| 21 | u32 line_count; | ||
| 22 | |||
| 23 | struct { | ||
| 24 | u32 address_high; | ||
| 25 | u32 address_low; | ||
| 26 | u32 pitch; | ||
| 27 | union { | ||
| 28 | BitField<0, 4, u32> block_width; | ||
| 29 | BitField<4, 4, u32> block_height; | ||
| 30 | BitField<8, 4, u32> block_depth; | ||
| 31 | }; | ||
| 32 | u32 width; | ||
| 33 | u32 height; | ||
| 34 | u32 depth; | ||
| 35 | u32 z; | ||
| 36 | u32 x; | ||
| 37 | u32 y; | ||
| 38 | |||
| 39 | GPUVAddr Address() const { | ||
| 40 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | ||
| 41 | } | ||
| 42 | |||
| 43 | u32 BlockWidth() const { | ||
| 44 | return 1U << block_width.Value(); | ||
| 45 | } | ||
| 46 | |||
| 47 | u32 BlockHeight() const { | ||
| 48 | return 1U << block_height.Value(); | ||
| 49 | } | ||
| 50 | |||
| 51 | u32 BlockDepth() const { | ||
| 52 | return 1U << block_depth.Value(); | ||
| 53 | } | ||
| 54 | } dest; | ||
| 55 | }; | ||
| 56 | |||
| 57 | class State { | ||
| 58 | public: | ||
| 59 | State(MemoryManager& memory_manager, Registers& regs); | ||
| 60 | ~State() = default; | ||
| 61 | |||
| 62 | void ProcessExec(const bool is_linear); | ||
| 63 | void ProcessData(const u32 data, const bool is_last_call); | ||
| 64 | |||
| 65 | private: | ||
| 66 | u32 write_offset = 0; | ||
| 67 | u32 copy_size = 0; | ||
| 68 | std::vector<u8> inner_buffer; | ||
| 69 | std::vector<u8> tmp_buffer; | ||
| 70 | bool is_linear = false; | ||
| 71 | Registers& regs; | ||
| 72 | MemoryManager& memory_manager; | ||
| 73 | }; | ||
| 74 | |||
| 75 | } // namespace Tegra::Engines::Upload | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2e51b7f13..45f59a4d9 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -21,6 +21,12 @@ class RasterizerInterface; | |||
| 21 | 21 | ||
| 22 | namespace Tegra::Engines { | 22 | namespace Tegra::Engines { |
| 23 | 23 | ||
| 24 | /** | ||
| 25 | * This Engine is known as G80_2D. Documentation can be found in: | ||
| 26 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml | ||
| 27 | * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h | ||
| 28 | */ | ||
| 29 | |||
| 24 | #define FERMI2D_REG_INDEX(field_name) \ | 30 | #define FERMI2D_REG_INDEX(field_name) \ |
| 25 | (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) | 31 | (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) |
| 26 | 32 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d950460..7404a8163 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -4,12 +4,21 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | ||
| 7 | #include "video_core/engines/kepler_compute.h" | 8 | #include "video_core/engines/kepler_compute.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | #include "video_core/textures/decoders.h" | ||
| 9 | 14 | ||
| 10 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 11 | 16 | ||
| 12 | KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} | 17 | KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 18 | MemoryManager& memory_manager) | ||
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ | ||
| 20 | memory_manager, | ||
| 21 | regs.upload} {} | ||
| 13 | 22 | ||
| 14 | KeplerCompute::~KeplerCompute() = default; | 23 | KeplerCompute::~KeplerCompute() = default; |
| 15 | 24 | ||
| @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 20 | regs.reg_array[method_call.method] = method_call.argument; | 29 | regs.reg_array[method_call.method] = method_call.argument; |
| 21 | 30 | ||
| 22 | switch (method_call.method) { | 31 | switch (method_call.method) { |
| 32 | case KEPLER_COMPUTE_REG_INDEX(exec_upload): { | ||
| 33 | upload_state.ProcessExec(regs.exec_upload.linear != 0); | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | ||
| 37 | const bool is_last_call = method_call.IsLastCall(); | ||
| 38 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 39 | if (is_last_call) { | ||
| 40 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 41 | } | ||
| 42 | break; | ||
| 43 | } | ||
| 23 | case KEPLER_COMPUTE_REG_INDEX(launch): | 44 | case KEPLER_COMPUTE_REG_INDEX(launch): |
| 24 | // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA | 45 | ProcessLaunch(); |
| 25 | // kernels) | ||
| 26 | UNREACHABLE_MSG("Compute shaders are not implemented"); | ||
| 27 | break; | 46 | break; |
| 28 | default: | 47 | default: |
| 29 | break; | 48 | break; |
| 30 | } | 49 | } |
| 31 | } | 50 | } |
| 32 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | ||
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | ||
| 57 | |||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | ||
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | ||
| 60 | } | ||
| 61 | |||
| 33 | } // namespace Tegra::Engines | 62 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf432..5250b8d9b 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -6,22 +6,40 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 10 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | ||
| 11 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 12 | 15 | ||
| 16 | namespace Core { | ||
| 17 | class System; | ||
| 18 | } | ||
| 19 | |||
| 13 | namespace Tegra { | 20 | namespace Tegra { |
| 14 | class MemoryManager; | 21 | class MemoryManager; |
| 15 | } | 22 | } |
| 16 | 23 | ||
| 24 | namespace VideoCore { | ||
| 25 | class RasterizerInterface; | ||
| 26 | } | ||
| 27 | |||
| 17 | namespace Tegra::Engines { | 28 | namespace Tegra::Engines { |
| 18 | 29 | ||
| 30 | /** | ||
| 31 | * This Engine is known as GK104_Compute. Documentation can be found in: | ||
| 32 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml | ||
| 33 | * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h | ||
| 34 | */ | ||
| 35 | |||
| 19 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 36 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 20 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 37 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 21 | 38 | ||
| 22 | class KeplerCompute final { | 39 | class KeplerCompute final { |
| 23 | public: | 40 | public: |
| 24 | explicit KeplerCompute(MemoryManager& memory_manager); | 41 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 42 | MemoryManager& memory_manager); | ||
| 25 | ~KeplerCompute(); | 43 | ~KeplerCompute(); |
| 26 | 44 | ||
| 27 | static constexpr std::size_t NumConstBuffers = 8; | 45 | static constexpr std::size_t NumConstBuffers = 8; |
| @@ -31,30 +49,181 @@ public: | |||
| 31 | 49 | ||
| 32 | union { | 50 | union { |
| 33 | struct { | 51 | struct { |
| 34 | INSERT_PADDING_WORDS(0xAF); | 52 | INSERT_PADDING_WORDS(0x60); |
| 53 | |||
| 54 | Upload::Registers upload; | ||
| 55 | |||
| 56 | struct { | ||
| 57 | union { | ||
| 58 | BitField<0, 1, u32> linear; | ||
| 59 | }; | ||
| 60 | } exec_upload; | ||
| 61 | |||
| 62 | u32 data_upload; | ||
| 63 | |||
| 64 | INSERT_PADDING_WORDS(0x3F); | ||
| 65 | |||
| 66 | struct { | ||
| 67 | u32 address; | ||
| 68 | GPUVAddr Address() const { | ||
| 69 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); | ||
| 70 | } | ||
| 71 | } launch_desc_loc; | ||
| 72 | |||
| 73 | INSERT_PADDING_WORDS(0x1); | ||
| 35 | 74 | ||
| 36 | u32 launch; | 75 | u32 launch; |
| 37 | 76 | ||
| 38 | INSERT_PADDING_WORDS(0xC48); | 77 | INSERT_PADDING_WORDS(0x4A7); |
| 78 | |||
| 79 | struct { | ||
| 80 | u32 address_high; | ||
| 81 | u32 address_low; | ||
| 82 | u32 limit; | ||
| 83 | GPUVAddr Address() const { | ||
| 84 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 85 | address_low); | ||
| 86 | } | ||
| 87 | } tsc; | ||
| 88 | |||
| 89 | INSERT_PADDING_WORDS(0x3); | ||
| 90 | |||
| 91 | struct { | ||
| 92 | u32 address_high; | ||
| 93 | u32 address_low; | ||
| 94 | u32 limit; | ||
| 95 | GPUVAddr Address() const { | ||
| 96 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 97 | address_low); | ||
| 98 | } | ||
| 99 | } tic; | ||
| 100 | |||
| 101 | INSERT_PADDING_WORDS(0x22); | ||
| 102 | |||
| 103 | struct { | ||
| 104 | u32 address_high; | ||
| 105 | u32 address_low; | ||
| 106 | GPUVAddr Address() const { | ||
| 107 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 108 | address_low); | ||
| 109 | } | ||
| 110 | } code_loc; | ||
| 111 | |||
| 112 | INSERT_PADDING_WORDS(0x3FE); | ||
| 113 | |||
| 114 | u32 texture_const_buffer_index; | ||
| 115 | |||
| 116 | INSERT_PADDING_WORDS(0x374); | ||
| 39 | }; | 117 | }; |
| 40 | std::array<u32, NUM_REGS> reg_array; | 118 | std::array<u32, NUM_REGS> reg_array; |
| 41 | }; | 119 | }; |
| 42 | } regs{}; | 120 | } regs{}; |
| 121 | |||
| 122 | struct LaunchParams { | ||
| 123 | static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; | ||
| 124 | |||
| 125 | INSERT_PADDING_WORDS(0x8); | ||
| 126 | |||
| 127 | u32 program_start; | ||
| 128 | |||
| 129 | INSERT_PADDING_WORDS(0x2); | ||
| 130 | |||
| 131 | BitField<30, 1, u32> linked_tsc; | ||
| 132 | |||
| 133 | BitField<0, 31, u32> grid_dim_x; | ||
| 134 | union { | ||
| 135 | BitField<0, 16, u32> grid_dim_y; | ||
| 136 | BitField<16, 16, u32> grid_dim_z; | ||
| 137 | }; | ||
| 138 | |||
| 139 | INSERT_PADDING_WORDS(0x3); | ||
| 140 | |||
| 141 | BitField<0, 16, u32> shared_alloc; | ||
| 142 | |||
| 143 | BitField<0, 31, u32> block_dim_x; | ||
| 144 | union { | ||
| 145 | BitField<0, 16, u32> block_dim_y; | ||
| 146 | BitField<16, 16, u32> block_dim_z; | ||
| 147 | }; | ||
| 148 | |||
| 149 | union { | ||
| 150 | BitField<0, 8, u32> const_buffer_enable_mask; | ||
| 151 | BitField<29, 2, u32> cache_layout; | ||
| 152 | } memory_config; | ||
| 153 | |||
| 154 | INSERT_PADDING_WORDS(0x8); | ||
| 155 | |||
| 156 | struct { | ||
| 157 | u32 address_low; | ||
| 158 | union { | ||
| 159 | BitField<0, 8, u32> address_high; | ||
| 160 | BitField<15, 17, u32> size; | ||
| 161 | }; | ||
| 162 | GPUVAddr Address() const { | ||
| 163 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | | ||
| 164 | address_low); | ||
| 165 | } | ||
| 166 | } const_buffer_config[8]; | ||
| 167 | |||
| 168 | union { | ||
| 169 | BitField<0, 20, u32> local_pos_alloc; | ||
| 170 | BitField<27, 5, u32> barrier_alloc; | ||
| 171 | }; | ||
| 172 | |||
| 173 | union { | ||
| 174 | BitField<0, 20, u32> local_neg_alloc; | ||
| 175 | BitField<24, 5, u32> gpr_alloc; | ||
| 176 | }; | ||
| 177 | |||
| 178 | INSERT_PADDING_WORDS(0x11); | ||
| 179 | } launch_description; | ||
| 180 | |||
| 181 | struct { | ||
| 182 | u32 write_offset = 0; | ||
| 183 | u32 copy_size = 0; | ||
| 184 | std::vector<u8> inner_buffer; | ||
| 185 | } state{}; | ||
| 186 | |||
| 43 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), | 187 | static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), |
| 44 | "KeplerCompute Regs has wrong size"); | 188 | "KeplerCompute Regs has wrong size"); |
| 45 | 189 | ||
| 190 | static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), | ||
| 191 | "KeplerCompute LaunchParams has wrong size"); | ||
| 192 | |||
| 46 | /// Write the value to the register identified by method. | 193 | /// Write the value to the register identified by method. |
| 47 | void CallMethod(const GPU::MethodCall& method_call); | 194 | void CallMethod(const GPU::MethodCall& method_call); |
| 48 | 195 | ||
| 49 | private: | 196 | private: |
| 197 | Core::System& system; | ||
| 198 | VideoCore::RasterizerInterface& rasterizer; | ||
| 50 | MemoryManager& memory_manager; | 199 | MemoryManager& memory_manager; |
| 200 | Upload::State upload_state; | ||
| 201 | |||
| 202 | void ProcessLaunch(); | ||
| 51 | }; | 203 | }; |
| 52 | 204 | ||
| 53 | #define ASSERT_REG_POSITION(field_name, position) \ | 205 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 54 | static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ | 206 | static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ |
| 55 | "Field " #field_name " has invalid position") | 207 | "Field " #field_name " has invalid position") |
| 56 | 208 | ||
| 209 | #define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ | ||
| 210 | static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ | ||
| 211 | "Field " #field_name " has invalid position") | ||
| 212 | |||
| 213 | ASSERT_REG_POSITION(upload, 0x60); | ||
| 214 | ASSERT_REG_POSITION(exec_upload, 0x6C); | ||
| 215 | ASSERT_REG_POSITION(data_upload, 0x6D); | ||
| 57 | ASSERT_REG_POSITION(launch, 0xAF); | 216 | ASSERT_REG_POSITION(launch, 0xAF); |
| 217 | ASSERT_REG_POSITION(tsc, 0x557); | ||
| 218 | ASSERT_REG_POSITION(tic, 0x55D); | ||
| 219 | ASSERT_REG_POSITION(code_loc, 0x582); | ||
| 220 | ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); | ||
| 221 | ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); | ||
| 222 | ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); | ||
| 223 | ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); | ||
| 224 | ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); | ||
| 225 | ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); | ||
| 226 | ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); | ||
| 58 | 227 | ||
| 59 | #undef ASSERT_REG_POSITION | 228 | #undef ASSERT_REG_POSITION |
| 60 | 229 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 7387886a3..0561f676c 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -14,9 +14,8 @@ | |||
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) |
| 18 | MemoryManager& memory_manager) | 18 | : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} |
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 20 | 19 | ||
| 21 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 22 | 21 | ||
| @@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 28 | 27 | ||
| 29 | switch (method_call.method) { | 28 | switch (method_call.method) { |
| 30 | case KEPLERMEMORY_REG_INDEX(exec): { | 29 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 31 | ProcessExec(); | 30 | upload_state.ProcessExec(regs.exec.linear != 0); |
| 32 | break; | 31 | break; |
| 33 | } | 32 | } |
| 34 | case KEPLERMEMORY_REG_INDEX(data): { | 33 | case KEPLERMEMORY_REG_INDEX(data): { |
| 35 | ProcessData(method_call.argument, method_call.IsLastCall()); | 34 | const bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 36 | if (is_last_call) { | ||
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 38 | } | ||
| 36 | break; | 39 | break; |
| 37 | } | 40 | } |
| 38 | } | 41 | } |
| 39 | } | 42 | } |
| 40 | 43 | ||
| 41 | void KeplerMemory::ProcessExec() { | ||
| 42 | state.write_offset = 0; | ||
| 43 | state.copy_size = regs.line_length_in * regs.line_count; | ||
| 44 | state.inner_buffer.resize(state.copy_size); | ||
| 45 | } | ||
| 46 | |||
| 47 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||
| 48 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||
| 49 | std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||
| 50 | state.write_offset += sub_copy_size; | ||
| 51 | if (is_last_call) { | ||
| 52 | const GPUVAddr address{regs.dest.Address()}; | ||
| 53 | if (regs.exec.linear != 0) { | ||
| 54 | memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||
| 55 | } else { | ||
| 56 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 57 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 58 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 59 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 60 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 61 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 62 | std::vector<u8> tmp_buffer(dst_size); | ||
| 63 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 64 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 65 | regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||
| 66 | state.inner_buffer.data(), tmp_buffer.data()); | ||
| 67 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 68 | } | ||
| 69 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Tegra::Engines | 44 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5f892ddad..f3bc675a9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | ||
| 13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| @@ -20,19 +21,20 @@ namespace Tegra { | |||
| 20 | class MemoryManager; | 21 | class MemoryManager; |
| 21 | } | 22 | } |
| 22 | 23 | ||
| 23 | namespace VideoCore { | ||
| 24 | class RasterizerInterface; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra::Engines { | 24 | namespace Tegra::Engines { |
| 28 | 25 | ||
| 26 | /** | ||
| 27 | * This Engine is known as P2MF. Documentation can be found in: | ||
| 28 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml | ||
| 29 | * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h | ||
| 30 | */ | ||
| 31 | |||
| 29 | #define KEPLERMEMORY_REG_INDEX(field_name) \ | 32 | #define KEPLERMEMORY_REG_INDEX(field_name) \ |
| 30 | (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) | 33 | (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) |
| 31 | 34 | ||
| 32 | class KeplerMemory final { | 35 | class KeplerMemory final { |
| 33 | public: | 36 | public: |
| 34 | KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 37 | KeplerMemory(Core::System& system, MemoryManager& memory_manager); |
| 35 | MemoryManager& memory_manager); | ||
| 36 | ~KeplerMemory(); | 38 | ~KeplerMemory(); |
| 37 | 39 | ||
| 38 | /// Write the value to the register identified by method. | 40 | /// Write the value to the register identified by method. |
| @@ -45,42 +47,7 @@ public: | |||
| 45 | struct { | 47 | struct { |
| 46 | INSERT_PADDING_WORDS(0x60); | 48 | INSERT_PADDING_WORDS(0x60); |
| 47 | 49 | ||
| 48 | u32 line_length_in; | 50 | Upload::Registers upload; |
| 49 | u32 line_count; | ||
| 50 | |||
| 51 | struct { | ||
| 52 | u32 address_high; | ||
| 53 | u32 address_low; | ||
| 54 | u32 pitch; | ||
| 55 | union { | ||
| 56 | BitField<0, 4, u32> block_width; | ||
| 57 | BitField<4, 4, u32> block_height; | ||
| 58 | BitField<8, 4, u32> block_depth; | ||
| 59 | }; | ||
| 60 | u32 width; | ||
| 61 | u32 height; | ||
| 62 | u32 depth; | ||
| 63 | u32 z; | ||
| 64 | u32 x; | ||
| 65 | u32 y; | ||
| 66 | |||
| 67 | GPUVAddr Address() const { | ||
| 68 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 69 | address_low); | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 BlockWidth() const { | ||
| 73 | return 1U << block_width.Value(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 BlockHeight() const { | ||
| 77 | return 1U << block_height.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 BlockDepth() const { | ||
| 81 | return 1U << block_depth.Value(); | ||
| 82 | } | ||
| 83 | } dest; | ||
| 84 | 51 | ||
| 85 | struct { | 52 | struct { |
| 86 | union { | 53 | union { |
| @@ -96,28 +63,17 @@ public: | |||
| 96 | }; | 63 | }; |
| 97 | } regs{}; | 64 | } regs{}; |
| 98 | 65 | ||
| 99 | struct { | ||
| 100 | u32 write_offset = 0; | ||
| 101 | u32 copy_size = 0; | ||
| 102 | std::vector<u8> inner_buffer; | ||
| 103 | } state{}; | ||
| 104 | |||
| 105 | private: | 66 | private: |
| 106 | Core::System& system; | 67 | Core::System& system; |
| 107 | VideoCore::RasterizerInterface& rasterizer; | ||
| 108 | MemoryManager& memory_manager; | 68 | MemoryManager& memory_manager; |
| 109 | 69 | Upload::State upload_state; | |
| 110 | void ProcessExec(); | ||
| 111 | void ProcessData(u32 data, bool is_last_call); | ||
| 112 | }; | 70 | }; |
| 113 | 71 | ||
| 114 | #define ASSERT_REG_POSITION(field_name, position) \ | 72 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 115 | static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ | 73 | static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ |
| 116 | "Field " #field_name " has invalid position") | 74 | "Field " #field_name " has invalid position") |
| 117 | 75 | ||
| 118 | ASSERT_REG_POSITION(line_length_in, 0x60); | 76 | ASSERT_REG_POSITION(upload, 0x60); |
| 119 | ASSERT_REG_POSITION(line_count, 0x61); | ||
| 120 | ASSERT_REG_POSITION(dest, 0x62); | ||
| 121 | ASSERT_REG_POSITION(exec, 0x6C); | 77 | ASSERT_REG_POSITION(exec, 0x6C); |
| 122 | ASSERT_REG_POSITION(data, 0x6D); | 78 | ASSERT_REG_POSITION(data, 0x6D); |
| 123 | #undef ASSERT_REG_POSITION | 79 | #undef ASSERT_REG_POSITION |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9780417f2..d7b586db9 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; | |||
| 20 | 20 | ||
| 21 | Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 21 | Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 22 | MemoryManager& memory_manager) | 22 | MemoryManager& memory_manager) |
| 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ | 23 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 24 | *this} { | 24 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { |
| 25 | InitializeRegisterDefaults(); | 25 | InitializeRegisterDefaults(); |
| 26 | } | 26 | } |
| 27 | 27 | ||
| @@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 253 | ProcessSyncPoint(); | 253 | ProcessSyncPoint(); |
| 254 | break; | 254 | break; |
| 255 | } | 255 | } |
| 256 | case MAXWELL3D_REG_INDEX(exec_upload): { | ||
| 257 | upload_state.ProcessExec(regs.exec_upload.linear != 0); | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | case MAXWELL3D_REG_INDEX(data_upload): { | ||
| 261 | const bool is_last_call = method_call.IsLastCall(); | ||
| 262 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 263 | if (is_last_call) { | ||
| 264 | dirty_flags.OnMemoryWrite(); | ||
| 265 | } | ||
| 266 | break; | ||
| 267 | } | ||
| 256 | default: | 268 | default: |
| 257 | break; | 269 | break; |
| 258 | } | 270 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 85d309d9b..4883b582a 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_funcs.h" | 14 | #include "common/common_funcs.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/math_util.h" | 16 | #include "common/math_util.h" |
| 17 | #include "video_core/engines/engine_upload.h" | ||
| 17 | #include "video_core/gpu.h" | 18 | #include "video_core/gpu.h" |
| 18 | #include "video_core/macro_interpreter.h" | 19 | #include "video_core/macro_interpreter.h" |
| 19 | #include "video_core/textures/texture.h" | 20 | #include "video_core/textures/texture.h" |
| @@ -32,6 +33,12 @@ class RasterizerInterface; | |||
| 32 | 33 | ||
| 33 | namespace Tegra::Engines { | 34 | namespace Tegra::Engines { |
| 34 | 35 | ||
| 36 | /** | ||
| 37 | * This Engine is known as GF100_3D. Documentation can be found in: | ||
| 38 | * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml | ||
| 39 | * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h | ||
| 40 | */ | ||
| 41 | |||
| 35 | #define MAXWELL3D_REG_INDEX(field_name) \ | 42 | #define MAXWELL3D_REG_INDEX(field_name) \ |
| 36 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | 43 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |
| 37 | 44 | ||
| @@ -580,7 +587,18 @@ public: | |||
| 580 | u32 bind; | 587 | u32 bind; |
| 581 | } macros; | 588 | } macros; |
| 582 | 589 | ||
| 583 | INSERT_PADDING_WORDS(0x69); | 590 | INSERT_PADDING_WORDS(0x17); |
| 591 | |||
| 592 | Upload::Registers upload; | ||
| 593 | struct { | ||
| 594 | union { | ||
| 595 | BitField<0, 1, u32> linear; | ||
| 596 | }; | ||
| 597 | } exec_upload; | ||
| 598 | |||
| 599 | u32 data_upload; | ||
| 600 | |||
| 601 | INSERT_PADDING_WORDS(0x44); | ||
| 584 | 602 | ||
| 585 | struct { | 603 | struct { |
| 586 | union { | 604 | union { |
| @@ -1176,6 +1194,8 @@ private: | |||
| 1176 | /// Interpreter for the macro codes uploaded to the GPU. | 1194 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1177 | MacroInterpreter macro_interpreter; | 1195 | MacroInterpreter macro_interpreter; |
| 1178 | 1196 | ||
| 1197 | Upload::State upload_state; | ||
| 1198 | |||
| 1179 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1199 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1180 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1200 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1181 | 1201 | ||
| @@ -1219,6 +1239,9 @@ private: | |||
| 1219 | "Field " #field_name " has invalid position") | 1239 | "Field " #field_name " has invalid position") |
| 1220 | 1240 | ||
| 1221 | ASSERT_REG_POSITION(macros, 0x45); | 1241 | ASSERT_REG_POSITION(macros, 0x45); |
| 1242 | ASSERT_REG_POSITION(upload, 0x60); | ||
| 1243 | ASSERT_REG_POSITION(exec_upload, 0x6C); | ||
| 1244 | ASSERT_REG_POSITION(data_upload, 0x6D); | ||
| 1222 | ASSERT_REG_POSITION(sync_info, 0xB2); | 1245 | ASSERT_REG_POSITION(sync_info, 0xB2); |
| 1223 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); | 1246 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); |
| 1224 | ASSERT_REG_POSITION(rt, 0x200); | 1247 | ASSERT_REG_POSITION(rt, 0x200); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d0067..3a5dfef0c 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() { | |||
| 83 | 83 | ||
| 84 | ASSERT(regs.exec.enable_2d == 1); | 84 | ASSERT(regs.exec.enable_2d == 1); |
| 85 | 85 | ||
| 86 | const std::size_t copy_size = regs.x_count * regs.y_count; | 86 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 87 | ASSERT(regs.src_params.size_z == 1); | ||
| 88 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||
| 89 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | ||
| 90 | const std::size_t src_size = Texture::CalculateSize( | ||
| 91 | true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, | ||
| 92 | regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||
| 87 | 93 | ||
| 88 | auto source_ptr{memory_manager.GetPointer(source)}; | 94 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; |
| 89 | auto dst_ptr{memory_manager.GetPointer(dest)}; | ||
| 90 | 95 | ||
| 91 | if (!source_ptr) { | 96 | if (read_buffer.size() < src_size) { |
| 92 | LOG_ERROR(HW_GPU, "source_ptr is invalid"); | 97 | read_buffer.resize(src_size); |
| 93 | return; | 98 | } |
| 94 | } | ||
| 95 | 99 | ||
| 96 | if (!dst_ptr) { | 100 | if (write_buffer.size() < dst_size) { |
| 97 | LOG_ERROR(HW_GPU, "dst_ptr is invalid"); | 101 | write_buffer.resize(dst_size); |
| 98 | return; | 102 | } |
| 99 | } | ||
| 100 | 103 | ||
| 101 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 104 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 102 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 105 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 103 | // copying. | ||
| 104 | rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); | ||
| 105 | 106 | ||
| 106 | // We have to invalidate the destination region to evict any outdated surfaces from the | 107 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, |
| 107 | // cache. We do this before actually writing the new data because the destination address | 108 | regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), |
| 108 | // might contain a dirty surface that will have to be written back to memory. | 109 | write_buffer.data(), regs.src_params.BlockHeight(), |
| 109 | rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); | 110 | regs.src_params.pos_x, regs.src_params.pos_y); |
| 110 | }; | ||
| 111 | 111 | ||
| 112 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | ASSERT(regs.src_params.size_z == 1); | 113 | } else { |
| 114 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 114 | ASSERT(regs.dst_params.BlockDepth() == 1); |
| 115 | 115 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; | 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 117 | ||
| 118 | FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, | 118 | const std::size_t dst_size = Texture::CalculateSize( |
| 119 | copy_size * src_bytes_per_pixel); | 119 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, |
| 120 | regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | ||
| 120 | 121 | ||
| 121 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | 122 | const std::size_t dst_layer_size = Texture::CalculateSize( |
| 122 | regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, | 123 | true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, |
| 123 | regs.src_params.BlockHeight(), regs.src_params.pos_x, | 124 | regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); |
| 124 | regs.src_params.pos_y); | ||
| 125 | } else { | ||
| 126 | ASSERT(regs.dst_params.size_z == 1); | ||
| 127 | ASSERT(regs.src_pitch == regs.x_count); | ||
| 128 | 125 | ||
| 129 | const u32 src_bpp = regs.src_pitch / regs.x_count; | 126 | const std::size_t src_size = regs.src_pitch * regs.y_count; |
| 130 | 127 | ||
| 131 | FlushAndInvalidate(regs.src_pitch * regs.y_count, | 128 | if (read_buffer.size() < src_size) { |
| 132 | regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); | 129 | read_buffer.resize(src_size); |
| 130 | } | ||
| 131 | |||
| 132 | if (write_buffer.size() < dst_size) { | ||
| 133 | write_buffer.resize(dst_size); | ||
| 134 | } | ||
| 135 | |||
| 136 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | ||
| 137 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 133 | 138 | ||
| 134 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 139 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 135 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, | 140 | Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, |
| 136 | src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); | 141 | src_bytes_per_pixel, |
| 142 | write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, | ||
| 143 | read_buffer.data(), regs.dst_params.BlockHeight()); | ||
| 144 | |||
| 145 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||
| 137 | } | 146 | } |
| 138 | } | 147 | } |
| 139 | 148 | ||
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c6b649842..e5942f671 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -25,6 +26,11 @@ class RasterizerInterface; | |||
| 25 | 26 | ||
| 26 | namespace Tegra::Engines { | 27 | namespace Tegra::Engines { |
| 27 | 28 | ||
| 29 | /** | ||
| 30 | * This Engine is known as GK104_Copy. Documentation can be found in: | ||
| 31 | * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml | ||
| 32 | */ | ||
| 33 | |||
| 28 | class MaxwellDMA final { | 34 | class MaxwellDMA final { |
| 29 | public: | 35 | public: |
| 30 | explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 36 | explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| @@ -63,6 +69,16 @@ public: | |||
| 63 | 69 | ||
| 64 | static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); | 70 | static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); |
| 65 | 71 | ||
| 72 | enum class ComponentMode : u32 { | ||
| 73 | Src0 = 0, | ||
| 74 | Src1 = 1, | ||
| 75 | Src2 = 2, | ||
| 76 | Src3 = 3, | ||
| 77 | Const0 = 4, | ||
| 78 | Const1 = 5, | ||
| 79 | Zero = 6, | ||
| 80 | }; | ||
| 81 | |||
| 66 | enum class CopyMode : u32 { | 82 | enum class CopyMode : u32 { |
| 67 | None = 0, | 83 | None = 0, |
| 68 | Unk1 = 1, | 84 | Unk1 = 1, |
| @@ -128,7 +144,26 @@ public: | |||
| 128 | u32 x_count; | 144 | u32 x_count; |
| 129 | u32 y_count; | 145 | u32 y_count; |
| 130 | 146 | ||
| 131 | INSERT_PADDING_WORDS(0xBB); | 147 | INSERT_PADDING_WORDS(0xB8); |
| 148 | |||
| 149 | u32 const0; | ||
| 150 | u32 const1; | ||
| 151 | union { | ||
| 152 | BitField<0, 4, ComponentMode> component0; | ||
| 153 | BitField<4, 4, ComponentMode> component1; | ||
| 154 | BitField<8, 4, ComponentMode> component2; | ||
| 155 | BitField<12, 4, ComponentMode> component3; | ||
| 156 | BitField<16, 2, u32> component_size; | ||
| 157 | BitField<20, 3, u32> src_num_components; | ||
| 158 | BitField<24, 3, u32> dst_num_components; | ||
| 159 | |||
| 160 | u32 SrcBytePerPixel() const { | ||
| 161 | return src_num_components.Value() * component_size.Value(); | ||
| 162 | } | ||
| 163 | u32 DstBytePerPixel() const { | ||
| 164 | return dst_num_components.Value() * component_size.Value(); | ||
| 165 | } | ||
| 166 | } swizzle_config; | ||
| 132 | 167 | ||
| 133 | Parameters dst_params; | 168 | Parameters dst_params; |
| 134 | 169 | ||
| @@ -149,6 +184,9 @@ private: | |||
| 149 | 184 | ||
| 150 | MemoryManager& memory_manager; | 185 | MemoryManager& memory_manager; |
| 151 | 186 | ||
| 187 | std::vector<u8> read_buffer; | ||
| 188 | std::vector<u8> write_buffer; | ||
| 189 | |||
| 152 | /// Performs the copy from the source buffer to the destination buffer as configured in the | 190 | /// Performs the copy from the source buffer to the destination buffer as configured in the |
| 153 | /// registers. | 191 | /// registers. |
| 154 | void HandleCopy(); | 192 | void HandleCopy(); |
| @@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104); | |||
| 165 | ASSERT_REG_POSITION(dst_pitch, 0x105); | 203 | ASSERT_REG_POSITION(dst_pitch, 0x105); |
| 166 | ASSERT_REG_POSITION(x_count, 0x106); | 204 | ASSERT_REG_POSITION(x_count, 0x106); |
| 167 | ASSERT_REG_POSITION(y_count, 0x107); | 205 | ASSERT_REG_POSITION(y_count, 0x107); |
| 206 | ASSERT_REG_POSITION(const0, 0x1C0); | ||
| 207 | ASSERT_REG_POSITION(const1, 0x1C1); | ||
| 208 | ASSERT_REG_POSITION(swizzle_config, 0x1C2); | ||
| 168 | ASSERT_REG_POSITION(dst_params, 0x1C3); | 209 | ASSERT_REG_POSITION(dst_params, 0x1C3); |
| 169 | ASSERT_REG_POSITION(src_params, 0x1CA); | 210 | ASSERT_REG_POSITION(src_params, 0x1CA); |
| 170 | 211 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4461083ff..52706505b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren | |||
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
| 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); | 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); |
| 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); | 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | GPU::~GPU() = default; | 43 | GPU::~GPU() = default; |