diff options
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 40 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.h | 24 |
2 files changed, 48 insertions, 16 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index cd51a31d7..3ed28f4a7 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -10,6 +10,8 @@ | |||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/convert.h" | ||
| 14 | #include "video_core/textures/decoders.h" | ||
| 13 | 15 | ||
| 14 | namespace Tegra::Engines { | 16 | namespace Tegra::Engines { |
| 15 | 17 | ||
| @@ -27,30 +29,40 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 27 | 29 | ||
| 28 | switch (method_call.method) { | 30 | switch (method_call.method) { |
| 29 | case KEPLERMEMORY_REG_INDEX(exec): { | 31 | case KEPLERMEMORY_REG_INDEX(exec): { |
| 30 | state.write_offset = 0; | 32 | ProcessExec(); |
| 31 | break; | 33 | break; |
| 32 | } | 34 | } |
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 35 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | ProcessData(method_call.argument); | 36 | ProcessData(method_call.argument, method_call.IsLastCall()); |
| 35 | break; | 37 | break; |
| 36 | } | 38 | } |
| 37 | } | 39 | } |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | void KeplerMemory::ProcessData(u32 data) { | 42 | void KeplerMemory::ProcessExec() { |
| 41 | ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | 43 | state.write_offset = 0; |
| 42 | ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | 44 | state.copy_size = regs.line_length_in * regs.line_count; |
| 43 | 45 | state.inner_buffer.resize(state.copy_size); | |
| 44 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 46 | } |
| 45 | // We do this before actually writing the new data because the destination address might | ||
| 46 | // contain a dirty surface that will have to be written back to memory. | ||
| 47 | const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||
| 48 | rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||
| 49 | memory_manager.Write<u32>(address, data); | ||
| 50 | 47 | ||
| 51 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 48 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { |
| 49 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||
| 50 | std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); | ||
| 51 | state.write_offset += sub_copy_size; | ||
| 52 | if (is_last_call) { | ||
| 53 | UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); | ||
| 54 | if (regs.exec.linear != 0) { | ||
| 55 | const GPUVAddr address{regs.dest.Address()}; | ||
| 56 | const auto host_ptr = memory_manager.GetPointer(address); | ||
| 57 | // We have to invalidate the destination region to evict any outdated surfaces from the | ||
| 58 | // cache. We do this before actually writing the new data because the destination | ||
| 59 | // address might contain a dirty surface that will have to be written back to memory. | ||
| 52 | 60 | ||
| 53 | state.write_offset++; | 61 | rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); |
| 62 | std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); | ||
| 63 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||
| 64 | } | ||
| 65 | } | ||
| 54 | } | 66 | } |
| 55 | 67 | ||
| 56 | } // namespace Tegra::Engines | 68 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 78b6c3e45..5f892ddad 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -51,7 +52,11 @@ public: | |||
| 51 | u32 address_high; | 52 | u32 address_high; |
| 52 | u32 address_low; | 53 | u32 address_low; |
| 53 | u32 pitch; | 54 | u32 pitch; |
| 54 | u32 block_dimensions; | 55 | union { |
| 56 | BitField<0, 4, u32> block_width; | ||
| 57 | BitField<4, 4, u32> block_height; | ||
| 58 | BitField<8, 4, u32> block_depth; | ||
| 59 | }; | ||
| 55 | u32 width; | 60 | u32 width; |
| 56 | u32 height; | 61 | u32 height; |
| 57 | u32 depth; | 62 | u32 depth; |
| @@ -63,6 +68,18 @@ public: | |||
| 63 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | 68 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
| 64 | address_low); | 69 | address_low); |
| 65 | } | 70 | } |
| 71 | |||
| 72 | u32 BlockWidth() const { | ||
| 73 | return 1U << block_width.Value(); | ||
| 74 | } | ||
| 75 | |||
| 76 | u32 BlockHeight() const { | ||
| 77 | return 1U << block_height.Value(); | ||
| 78 | } | ||
| 79 | |||
| 80 | u32 BlockDepth() const { | ||
| 81 | return 1U << block_depth.Value(); | ||
| 82 | } | ||
| 66 | } dest; | 83 | } dest; |
| 67 | 84 | ||
| 68 | struct { | 85 | struct { |
| @@ -81,6 +98,8 @@ public: | |||
| 81 | 98 | ||
| 82 | struct { | 99 | struct { |
| 83 | u32 write_offset = 0; | 100 | u32 write_offset = 0; |
| 101 | u32 copy_size = 0; | ||
| 102 | std::vector<u8> inner_buffer; | ||
| 84 | } state{}; | 103 | } state{}; |
| 85 | 104 | ||
| 86 | private: | 105 | private: |
| @@ -88,7 +107,8 @@ private: | |||
| 88 | VideoCore::RasterizerInterface& rasterizer; | 107 | VideoCore::RasterizerInterface& rasterizer; |
| 89 | MemoryManager& memory_manager; | 108 | MemoryManager& memory_manager; |
| 90 | 109 | ||
| 91 | void ProcessData(u32 data); | 110 | void ProcessExec(); |
| 111 | void ProcessData(u32 data, bool is_last_call); | ||
| 92 | }; | 112 | }; |
| 93 | 113 | ||
| 94 | #define ASSERT_REG_POSITION(field_name, position) \ | 114 | #define ASSERT_REG_POSITION(field_name, position) \ |