diff options
| author | 2019-04-22 18:50:56 -0400 | |
|---|---|---|
| committer | 2019-04-22 18:50:56 -0400 | |
| commit | a91d3fc6397560fc6294a24faeed73d45abd1753 (patch) | |
| tree | 5f33a0964c33e3ce54520f74d72c0117a5770b15 /src | |
| parent | Merge pull request #2403 from FernandoS27/compressed-linear (diff) | |
| download | yuzu-a91d3fc6397560fc6294a24faeed73d45abd1753.tar.gz yuzu-a91d3fc6397560fc6294a24faeed73d45abd1753.tar.xz yuzu-a91d3fc6397560fc6294a24faeed73d45abd1753.zip | |
Revamp Kepler Memory to use a subegine to manage uploads
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 74 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 45 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 60 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 |
6 files changed, 134 insertions, 93 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1e31a2900..7d02ec2d0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -3,6 +3,8 @@ add_library(video_core STATIC | |||
| 3 | dma_pusher.h | 3 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 4 | debug_utils/debug_utils.cpp |
| 5 | debug_utils/debug_utils.h | 5 | debug_utils/debug_utils.h |
| 6 | engines/engine_upload.cpp | ||
| 7 | engines/engine_upload.h | ||
| 6 | engines/fermi_2d.cpp | 8 | engines/fermi_2d.cpp |
| 7 | engines/fermi_2d.h | 9 | engines/fermi_2d.h |
| 8 | engines/kepler_compute.cpp | 10 | engines/kepler_compute.cpp |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 000000000..201f8e273 --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/engines/engine_upload.h" | ||
| 7 | #include "video_core/memory_manager.h" | ||
| 8 | #include "video_core/textures/decoders.h" | ||
| 9 | |||
| 10 | namespace Tegra::Engines::Upload { | ||
| 11 | |||
| 12 | void State::ProcessExec(const bool is_linear) { | ||
| 13 | write_offset = 0; | ||
| 14 | copy_size = regs.line_length_in * regs.line_count; | ||
| 15 | inner_buffer.resize(copy_size); | ||
| 16 | linear = is_linear; | ||
| 17 | } | ||
| 18 | |||
| 19 | void State::ProcessData(const u32 data, const bool is_last_call) { | ||
| 20 | const u32 sub_copy_size = std::min(4U, copy_size - write_offset); | ||
| 21 | std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); | ||
| 22 | write_offset += sub_copy_size; | ||
| 23 | if (is_last_call) { | ||
| 24 | const GPUVAddr address{regs.dest.Address()}; | ||
| 25 | if (linear) { | ||
| 26 | memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); | ||
| 27 | } else { | ||
| 28 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 29 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 30 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 31 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 32 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 33 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 34 | std::vector<u8> tmp_buffer(dst_size); | ||
| 35 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 36 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 37 | regs.dest.y, regs.dest.BlockHeight(), copy_size, | ||
| 38 | inner_buffer.data(), tmp_buffer.data()); | ||
| 39 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Tegra::Engines::Upload | ||
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 000000000..3a817140a --- /dev/null +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_funcs.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | class MemoryManager; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace Tegra::Engines::Upload { | ||
| 18 | |||
| 19 | struct Data { | ||
| 20 | u32 line_length_in; | ||
| 21 | u32 line_count; | ||
| 22 | |||
| 23 | struct { | ||
| 24 | u32 address_high; | ||
| 25 | u32 address_low; | ||
| 26 | u32 pitch; | ||
| 27 | union { | ||
| 28 | BitField<0, 4, u32> block_width; | ||
| 29 | BitField<4, 4, u32> block_height; | ||
| 30 | BitField<8, 4, u32> block_depth; | ||
| 31 | }; | ||
| 32 | u32 width; | ||
| 33 | u32 height; | ||
| 34 | u32 depth; | ||
| 35 | u32 z; | ||
| 36 | u32 x; | ||
| 37 | u32 y; | ||
| 38 | |||
| 39 | GPUVAddr Address() const { | ||
| 40 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | ||
| 41 | } | ||
| 42 | |||
| 43 | u32 BlockWidth() const { | ||
| 44 | return 1U << block_width.Value(); | ||
| 45 | } | ||
| 46 | |||
| 47 | u32 BlockHeight() const { | ||
| 48 | return 1U << block_height.Value(); | ||
| 49 | } | ||
| 50 | |||
| 51 | u32 BlockDepth() const { | ||
| 52 | return 1U << block_depth.Value(); | ||
| 53 | } | ||
| 54 | } dest; | ||
| 55 | }; | ||
| 56 | |||
| 57 | class State { | ||
| 58 | public: | ||
| 59 | State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {} | ||
| 60 | ~State() = default; | ||
| 61 | |||
| 62 | void ProcessExec(const bool is_linear); | ||
| 63 | void ProcessData(const u32 data, const bool is_last_call); | ||
| 64 | |||
| 65 | private: | ||
| 66 | u32 write_offset = 0; | ||
| 67 | u32 copy_size = 0; | ||
| 68 | std::vector<u8> inner_buffer; | ||
| 69 | bool linear; | ||
| 70 | Data& regs; | ||
| 71 | MemoryManager& memory_manager; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Tegra::Engines::Upload | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 7387886a3..71fa499d3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -14,9 +14,8 @@ | |||
| 14 | 14 | ||
| 15 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 16 | 16 | ||
| 17 | KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 17 | KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) |
| 18 | MemoryManager& memory_manager) | 18 | : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} |
| 19 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||
| 20 | 19 | ||
| 21 | KeplerMemory::~KeplerMemory() = default; | 20 | KeplerMemory::~KeplerMemory() = default; |
| 22 | 21 | ||
| @@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 28 | 27 | ||
| 29 | switch (method_call.method) { | 28 | switch (method_call.method) { |
| 30 | case KEPLERMEMORY_REG_INDEX(exec): { | 29 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 31 | ProcessExec(); | 30 | upload_state.ProcessExec(regs.exec.linear != 0); |
| 32 | break; | 31 | break; |
| 33 | } | 32 | } |
| 34 | case KEPLERMEMORY_REG_INDEX(data): { | 33 | case KEPLERMEMORY_REG_INDEX(data): { |
| 35 | ProcessData(method_call.argument, method_call.IsLastCall()); | 34 | bool is_last_call = method_call.IsLastCall(); |
| 35 | upload_state.ProcessData(method_call.argument, is_last_call); | ||
| 36 | if (is_last_call) { | ||
| 37 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 38 | } | ||
| 36 | break; | 39 | break; |
| 37 | } | 40 | } |
| 38 | } | 41 | } |
| 39 | } | 42 | } |
| 40 | 43 | ||
| 41 | void KeplerMemory::ProcessExec() { | ||
| 42 | state.write_offset = 0; | ||
| 43 | state.copy_size = regs.line_length_in * regs.line_count; | ||
| 44 | state.inner_buffer.resize(state.copy_size); | ||
| 45 | } | ||
| 46 | |||
| 47 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||
| 48 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||
| 49 | std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||
| 50 | state.write_offset += sub_copy_size; | ||
| 51 | if (is_last_call) { | ||
| 52 | const GPUVAddr address{regs.dest.Address()}; | ||
| 53 | if (regs.exec.linear != 0) { | ||
| 54 | memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||
| 55 | } else { | ||
| 56 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 57 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 58 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 59 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 60 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 61 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 62 | std::vector<u8> tmp_buffer(dst_size); | ||
| 63 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 64 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 65 | regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||
| 66 | state.inner_buffer.data(), tmp_buffer.data()); | ||
| 67 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 68 | } | ||
| 69 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace Tegra::Engines | 44 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5f892ddad..c6b738eb9 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | ||
| 13 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 14 | 15 | ||
| 15 | namespace Core { | 16 | namespace Core { |
| @@ -20,10 +21,6 @@ namespace Tegra { | |||
| 20 | class MemoryManager; | 21 | class MemoryManager; |
| 21 | } | 22 | } |
| 22 | 23 | ||
| 23 | namespace VideoCore { | ||
| 24 | class RasterizerInterface; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace Tegra::Engines { | 24 | namespace Tegra::Engines { |
| 28 | 25 | ||
| 29 | #define KEPLERMEMORY_REG_INDEX(field_name) \ | 26 | #define KEPLERMEMORY_REG_INDEX(field_name) \ |
| @@ -31,8 +28,7 @@ namespace Tegra::Engines { | |||
| 31 | 28 | ||
| 32 | class KeplerMemory final { | 29 | class KeplerMemory final { |
| 33 | public: | 30 | public: |
| 34 | KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 31 | KeplerMemory(Core::System& system, MemoryManager& memory_manager); |
| 35 | MemoryManager& memory_manager); | ||
| 36 | ~KeplerMemory(); | 32 | ~KeplerMemory(); |
| 37 | 33 | ||
| 38 | /// Write the value to the register identified by method. | 34 | /// Write the value to the register identified by method. |
| @@ -45,42 +41,7 @@ public: | |||
| 45 | struct { | 41 | struct { |
| 46 | INSERT_PADDING_WORDS(0x60); | 42 | INSERT_PADDING_WORDS(0x60); |
| 47 | 43 | ||
| 48 | u32 line_length_in; | 44 | Upload::Data upload; |
| 49 | u32 line_count; | ||
| 50 | |||
| 51 | struct { | ||
| 52 | u32 address_high; | ||
| 53 | u32 address_low; | ||
| 54 | u32 pitch; | ||
| 55 | union { | ||
| 56 | BitField<0, 4, u32> block_width; | ||
| 57 | BitField<4, 4, u32> block_height; | ||
| 58 | BitField<8, 4, u32> block_depth; | ||
| 59 | }; | ||
| 60 | u32 width; | ||
| 61 | u32 height; | ||
| 62 | u32 depth; | ||
| 63 | u32 z; | ||
| 64 | u32 x; | ||
| 65 | u32 y; | ||
| 66 | |||
| 67 | GPUVAddr Address() const { | ||
| 68 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 69 | address_low); | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 BlockWidth() const { | ||
| 73 | return 1U << block_width.Value(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 BlockHeight() const { | ||
| 77 | return 1U << block_height.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 BlockDepth() const { | ||
| 81 | return 1U << block_depth.Value(); | ||
| 82 | } | ||
| 83 | } dest; | ||
| 84 | 45 | ||
| 85 | struct { | 46 | struct { |
| 86 | union { | 47 | union { |
| @@ -96,28 +57,17 @@ public: | |||
| 96 | }; | 57 | }; |
| 97 | } regs{}; | 58 | } regs{}; |
| 98 | 59 | ||
| 99 | struct { | ||
| 100 | u32 write_offset = 0; | ||
| 101 | u32 copy_size = 0; | ||
| 102 | std::vector<u8> inner_buffer; | ||
| 103 | } state{}; | ||
| 104 | |||
| 105 | private: | 60 | private: |
| 106 | Core::System& system; | 61 | Core::System& system; |
| 107 | VideoCore::RasterizerInterface& rasterizer; | ||
| 108 | MemoryManager& memory_manager; | 62 | MemoryManager& memory_manager; |
| 109 | 63 | Upload::State upload_state; | |
| 110 | void ProcessExec(); | ||
| 111 | void ProcessData(u32 data, bool is_last_call); | ||
| 112 | }; | 64 | }; |
| 113 | 65 | ||
| 114 | #define ASSERT_REG_POSITION(field_name, position) \ | 66 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 115 | static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ | 67 | static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ |
| 116 | "Field " #field_name " has invalid position") | 68 | "Field " #field_name " has invalid position") |
| 117 | 69 | ||
| 118 | ASSERT_REG_POSITION(line_length_in, 0x60); | 70 | ASSERT_REG_POSITION(upload, 0x60); |
| 119 | ASSERT_REG_POSITION(line_count, 0x61); | ||
| 120 | ASSERT_REG_POSITION(dest, 0x62); | ||
| 121 | ASSERT_REG_POSITION(exec, 0x6C); | 71 | ASSERT_REG_POSITION(exec, 0x6C); |
| 122 | ASSERT_REG_POSITION(data, 0x6D); | 72 | ASSERT_REG_POSITION(data, 0x6D); |
| 123 | #undef ASSERT_REG_POSITION | 73 | #undef ASSERT_REG_POSITION |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4461083ff..9db6e4763 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -37,7 +37,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren | |||
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
| 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); | 38 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); |
| 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 39 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); |
| 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); | 40 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | GPU::~GPU() = default; | 43 | GPU::~GPU() = default; |