diff options
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 3 |
3 files changed, 38 insertions, 5 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 3ed28f4a7..4df19c1f5 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/textures/convert.h" | ||
| 14 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 15 | 14 | ||
| 16 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| @@ -47,13 +46,12 @@ void KeplerMemory::ProcessExec() { | |||
| 47 | 46 | ||
| 48 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | 47 | void KeplerMemory::ProcessData(u32 data, bool is_last_call) { |
| 49 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | 48 | const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); |
| 50 | std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); | 49 | std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); |
| 51 | state.write_offset += sub_copy_size; | 50 | state.write_offset += sub_copy_size; |
| 52 | if (is_last_call) { | 51 | if (is_last_call) { |
| 53 | UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); | 52 | const GPUVAddr address{regs.dest.Address()}; |
| 53 | const auto host_ptr = memory_manager.GetPointer(address); | ||
| 54 | if (regs.exec.linear != 0) { | 54 | if (regs.exec.linear != 0) { |
| 55 | const GPUVAddr address{regs.dest.Address()}; | ||
| 56 | const auto host_ptr = memory_manager.GetPointer(address); | ||
| 57 | // We have to invalidate the destination region to evict any outdated surfaces from the | 55 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 58 | // cache. We do this before actually writing the new data because the destination | 56 | // cache. We do this before actually writing the new data because the destination |
| 59 | // address might contain a dirty surface that will have to be written back to memory. | 57 | // address might contain a dirty surface that will have to be written back to memory. |
| @@ -61,6 +59,17 @@ void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | |||
| 61 | rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); | 59 | rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); |
| 62 | std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); | 60 | std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); |
| 63 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 61 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
| 62 | } else { | ||
| 63 | UNIMPLEMENTED_IF(regs.dest.z != 0); | ||
| 64 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||
| 65 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||
| 66 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||
| 67 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||
| 68 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||
| 69 | rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), dst_size); | ||
| 70 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||
| 71 | regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||
| 72 | state.inner_buffer.data(), host_ptr); | ||
| 64 | } | 73 | } |
| 65 | } | 74 | } |
| 66 | } | 75 | } |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 995d0e068..6e02a6407 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -288,6 +288,27 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 288 | } | 288 | } |
| 289 | } | 289 | } |
| 290 | 290 | ||
| 291 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 292 | std::size_t copy_size, u8* source_data, u8* swizzle_data) { | ||
| 293 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | ||
| 294 | std::size_t count = 0; | ||
| 295 | for (u32 y = dst_y; y < height && count < copy_size; ++y) { | ||
| 296 | const u32 gob_address_y = | ||
| 297 | (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | ||
| 298 | ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | ||
| 299 | const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||
| 300 | for (u32 x = dst_x; x < width && count < copy_size; ++x) { | ||
| 301 | const u32 gob_address = gob_address_y + (x / gob_size_x) * gob_size * block_height; | ||
| 302 | const u32 swizzled_offset = gob_address + table[x % gob_size_x]; | ||
| 303 | const u8* source_line = source_data + count; | ||
| 304 | u8* dest_addr = swizzle_data + swizzled_offset; | ||
| 305 | count++; | ||
| 306 | |||
| 307 | std::memcpy(dest_addr, source_line, 1); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | } | ||
| 311 | |||
| 291 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 312 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 292 | u32 height) { | 313 | u32 height) { |
| 293 | std::vector<u8> rgba_data; | 314 | std::vector<u8> rgba_data; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e078fa274..21d4b37fc 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -51,4 +51,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 51 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, |
| 52 | u32 offset_x, u32 offset_y); | 52 | u32 offset_x, u32 offset_y); |
| 53 | 53 | ||
| 54 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 55 | std::size_t copy_size, u8* source_data, u8* swizzle_data); | ||
| 56 | |||
| 54 | } // namespace Tegra::Texture | 57 | } // namespace Tegra::Texture |