summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/kepler_memory.cpp19
-rw-r--r--src/video_core/textures/decoders.cpp21
-rw-r--r--src/video_core/textures/decoders.h3
3 files changed, 38 insertions, 5 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 3ed28f4a7..4df19c1f5 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -10,7 +10,6 @@
10#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/textures/convert.h"
14#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
15 14
16namespace Tegra::Engines { 15namespace Tegra::Engines {
@@ -47,13 +46,12 @@ void KeplerMemory::ProcessExec() {
47 46
48void KeplerMemory::ProcessData(u32 data, bool is_last_call) { 47void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
49 const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); 48 const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
50 std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); 49 std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
51 state.write_offset += sub_copy_size; 50 state.write_offset += sub_copy_size;
52 if (is_last_call) { 51 if (is_last_call) {
53 UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); 52 const GPUVAddr address{regs.dest.Address()};
53 const auto host_ptr = memory_manager.GetPointer(address);
54 if (regs.exec.linear != 0) { 54 if (regs.exec.linear != 0) {
55 const GPUVAddr address{regs.dest.Address()};
56 const auto host_ptr = memory_manager.GetPointer(address);
57 // We have to invalidate the destination region to evict any outdated surfaces from the 55 // We have to invalidate the destination region to evict any outdated surfaces from the
58 // cache. We do this before actually writing the new data because the destination 56 // cache. We do this before actually writing the new data because the destination
59 // address might contain a dirty surface that will have to be written back to memory. 57 // address might contain a dirty surface that will have to be written back to memory.
@@ -61,6 +59,17 @@ void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
61 rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); 59 rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size);
62 std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); 60 std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size);
63 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
62 } else {
63 UNIMPLEMENTED_IF(regs.dest.z != 0);
64 UNIMPLEMENTED_IF(regs.dest.depth != 1);
65 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
66 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
67 const std::size_t dst_size = Tegra::Texture::CalculateSize(
68 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
69 rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), dst_size);
70 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
71 regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
72 state.inner_buffer.data(), host_ptr);
64 } 73 }
65 } 74 }
66} 75}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 995d0e068..6e02a6407 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -288,6 +288,27 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
288 } 288 }
289} 289}
290 290
291void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
292 std::size_t copy_size, u8* source_data, u8* swizzle_data) {
293 const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
294 std::size_t count = 0;
295 for (u32 y = dst_y; y < height && count < copy_size; ++y) {
296 const u32 gob_address_y =
297 (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
298 ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
299 const auto& table = legacy_swizzle_table[y % gob_size_y];
300 for (u32 x = dst_x; x < width && count < copy_size; ++x) {
301 const u32 gob_address = gob_address_y + (x / gob_size_x) * gob_size * block_height;
302 const u32 swizzled_offset = gob_address + table[x % gob_size_x];
303 const u8* source_line = source_data + count;
304 u8* dest_addr = swizzle_data + swizzled_offset;
305 count++;
306
307 std::memcpy(dest_addr, source_line, 1);
308 }
309 }
310}
311
291std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 312std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
292 u32 height) { 313 u32 height) {
293 std::vector<u8> rgba_data; 314 std::vector<u8> rgba_data;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index e078fa274..21d4b37fc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -51,4 +51,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
52 u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
53 53
54void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
55 std::size_t copy_size, u8* source_data, u8* swizzle_data);
56
54} // namespace Tegra::Texture 57} // namespace Tegra::Texture