diff options
| author | 2019-04-22 16:47:05 -0400 | |
|---|---|---|
| committer | 2019-04-22 16:47:05 -0400 | |
| commit | 01100f8afdc22cfcc0b94b86b7e352a41120b8d9 (patch) | |
| tree | 40dc11e843f60d886ff7d4ddecab694cc4ce6451 /src | |
| parent | Merge pull request #2407 from FernandoS27/f2f (diff) | |
| parent | Apply Const correctness to SwizzleKepler and replace u32 for size_t on iterat... (diff) | |
| download | yuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.tar.gz yuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.tar.xz yuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.zip | |
Merge pull request #2400 from FernandoS27/corret-kepler-mem
Implement Kepler Memory on both Linear and BlockLinear.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 24 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 4 |
4 files changed, 81 insertions, 17 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d7..7387886a3 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/decoders.h" | ||
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| 15 | 16 | ||
| @@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 27 | 28 | ||
| 28 | switch (method_call.method) { | 29 | switch (method_call.method) { |
| 29 | case KEPLERMEMORY_REG_INDEX(exec): { | 30 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 30 | state.write_offset = 0; | 31 | ProcessExec(); |
| 31 | break; | 32 | break; |
| 32 | } | 33 | } |
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 34 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | ProcessData(method_call.argument); | 35 | ProcessData(method_call.argument, method_call.IsLastCall()); |
| 35 | break; | 36 | break; |
| 36 | } | 37 | } |
| 37 | } | 38 | } |
| 38 | } | 39 | } |
| 39 | 40 | ||
| 40 | void KeplerMemory::ProcessData(u32 data) { | 41 | void KeplerMemory::ProcessExec() { |
| 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | 42 | state.write_offset = 0; |
| 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | 43 | state.copy_size = regs.line_length_in * regs.line_count; |
| 43 | 44 | state.inner_buffer.resize(state.copy_size); | |
| 44 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 45 | } |
| 45 | // We do this before actually writing the new data because the destination address might | ||
| 46 | // contain a dirty surface that will have to be written back to memory. | ||
| 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||
| 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||
| 49 | memory_manager.Write<u32>(address, data); | ||
| 50 | |||
| 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 52 | 46 | ||
| 53 | state.write_offset++; | 47 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { |
| 48 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||
| 49 | std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||
| 50 | state.write_offset += sub_copy_size; | ||
| 51 | if (is_last_call) { | ||
| 52 | const GPUVAddr address{regs.dest.Address()}; | ||
| 53 | if (regs.exec.linear != 0) { | ||
| 54 | memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||
| 55 | } else { | ||
| 56 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 57 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 58 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 59 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 60 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 61 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 62 | std::vector<u8> tmp_buffer(dst_size); | ||
| 63 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||
| 64 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 65 | regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||
| 66 | state.inner_buffer.data(), tmp_buffer.data()); | ||
| 67 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||
| 68 | } | ||
| 69 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 70 | } | ||
| 54 | } | 71 | } |
| 55 | 72 | ||
| 56 | } // namespace Tegra::Engines | 73 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e45..5f892ddad 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -51,7 +52,11 @@ public: | |||
| 51 | u32 address_high; | 52 | u32 address_high; |
| 52 | u32 address_low; | 53 | u32 address_low; |
| 53 | u32 pitch; | 54 | u32 pitch; |
| 54 | u32 block_dimensions; | 55 | union { |
| 56 | BitField<0, 4, u32> block_width; | ||
| 57 | BitField<4, 4, u32> block_height; | ||
| 58 | BitField<8, 4, u32> block_depth; | ||
| 59 | }; | ||
| 55 | u32 width; | 60 | u32 width; |
| 56 | u32 height; | 61 | u32 height; |
| 57 | u32 depth; | 62 | u32 depth; |
| @@ -63,6 +68,18 @@ public: | |||
| 63 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | 68 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 64 | address_low); | 69 | address_low); |
| 65 | } | 70 | } |
| 71 | |||
| 72 | u32 BlockWidth() const { | ||
| 73 | return 1U << block_width.Value(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 BlockHeight() const { | ||
| 77 | return 1U << block_height.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 BlockDepth() const { | ||
| 81 | return 1U << block_depth.Value(); | ||
| 82 | } | ||
| 66 | } dest; | 83 | } dest; |
| 67 | 84 | ||
| 68 | struct { | 85 | struct { |
| @@ -81,6 +98,8 @@ public: | |||
| 81 | 98 | ||
| 82 | struct { | 99 | struct { |
| 83 | u32 write_offset = 0; | 100 | u32 write_offset = 0; |
| 101 | u32 copy_size = 0; | ||
| 102 | std::vector<u8> inner_buffer; | ||
| 84 | } state{}; | 103 | } state{}; |
| 85 | 104 | ||
| 86 | private: | 105 | private: |
| @@ -88,7 +107,8 @@ private: | |||
| 88 | VideoCore::RasterizerInterface& rasterizer; | 107 | VideoCore::RasterizerInterface& rasterizer; |
| 89 | MemoryManager& memory_manager; | 108 | MemoryManager& memory_manager; |
| 90 | 109 | ||
| 91 | void ProcessData(u32 data); | 110 | void ProcessExec(); |
| 111 | void ProcessData(u32 data, bool is_last_call); | ||
| 92 | }; | 112 | }; |
| 93 | 113 | ||
| 94 | #define ASSERT_REG_POSITION(field_name, position) \ | 114 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068..217805386 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 288 | } | 288 | } |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||
| 293 | u8* swizzle_data) { | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | ||
| 295 | std::size_t count = 0; | ||
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | ||
| 297 | const std::size_t gob_address_y = | ||
| 298 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | ||
| 299 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | ||
| 300 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||
| 301 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | ||
| 302 | const std::size_t gob_address = | ||
| 303 | gob_address_y + (x / gob_size_x) * gob_size * block_height; | ||
| 304 | const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | ||
| 305 | const u8* source_line = source_data + count; | ||
| 306 | u8* dest_addr = swizzle_data + swizzled_offset; | ||
| 307 | count++; | ||
| 308 | |||
| 309 | std::memcpy(dest_addr, source_line, 1); | ||
| 310 | } | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 291 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 314 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 292 | u32 height) { | 315 | u32 height) { |
| 293 | std::vector<u8> rgba_data; | 316 | std::vector<u8> rgba_data; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa274..e072d8401 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 52 | u32 offset_x, u32 offset_y); | 52 | u32 offset_x, u32 offset_y); |
| 53 | 53 | ||
| 54 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||
| 55 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||
| 56 | u8* swizzle_data); | ||
| 57 | |||
| 54 | } // namespace Tegra::Texture | 58 | } // namespace Tegra::Texture |