summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-04-22 16:47:05 -0400
committerGravatar GitHub2019-04-22 16:47:05 -0400
commit01100f8afdc22cfcc0b94b86b7e352a41120b8d9 (patch)
tree40dc11e843f60d886ff7d4ddecab694cc4ce6451 /src
parentMerge pull request #2407 from FernandoS27/f2f (diff)
parentApply Const correctness to SwizzleKepler and replace u32 for size_t on iterat... (diff)
downloadyuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.tar.gz
yuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.tar.xz
yuzu-01100f8afdc22cfcc0b94b86b7e352a41120b8d9.zip
Merge pull request #2400 from FernandoS27/corret-kepler-mem
Implement Kepler Memory on both Linear and BlockLinear.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/kepler_memory.cpp47
-rw-r--r--src/video_core/engines/kepler_memory.h24
-rw-r--r--src/video_core/textures/decoders.cpp23
-rw-r--r--src/video_core/textures/decoders.h4
4 files changed, 81 insertions, 17 deletions
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index cd51a31d7..7387886a3 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -10,6 +10,7 @@
10#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/textures/decoders.h"
13 14
14namespace Tegra::Engines { 15namespace Tegra::Engines {
15 16
@@ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
27 28
28 switch (method_call.method) { 29 switch (method_call.method) {
29 case KEPLERMEMORY_REG_INDEX(exec): { 30 case KEPLERMEMORY_REG_INDEX(exec): {
30 state.write_offset = 0; 31 ProcessExec();
31 break; 32 break;
32 } 33 }
33 case KEPLERMEMORY_REG_INDEX(data): { 34 case KEPLERMEMORY_REG_INDEX(data): {
34 ProcessData(method_call.argument); 35 ProcessData(method_call.argument, method_call.IsLastCall());
35 break; 36 break;
36 } 37 }
37 } 38 }
38} 39}
39 40
40void KeplerMemory::ProcessData(u32 data) { 41void KeplerMemory::ProcessExec() {
41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 42 state.write_offset = 0;
42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 43 state.copy_size = regs.line_length_in * regs.line_count;
43 44 state.inner_buffer.resize(state.copy_size);
44 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 45}
45 // We do this before actually writing the new data because the destination address might
46 // contain a dirty surface that will have to be written back to memory.
47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write<u32>(address, data);
50
51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
52 46
53 state.write_offset++; 47void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
48 const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
49 std::memcpy(&state.inner_buffer[state.write_offset], &regs.data, sub_copy_size);
50 state.write_offset += sub_copy_size;
51 if (is_last_call) {
52 const GPUVAddr address{regs.dest.Address()};
53 if (regs.exec.linear != 0) {
54 memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size);
55 } else {
56 UNIMPLEMENTED_IF(regs.dest.z != 0);
57 UNIMPLEMENTED_IF(regs.dest.depth != 1);
58 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1);
59 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1);
60 const std::size_t dst_size = Tegra::Texture::CalculateSize(
61 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1);
62 std::vector<u8> tmp_buffer(dst_size);
63 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
64 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x,
65 regs.dest.y, regs.dest.BlockHeight(), state.copy_size,
66 state.inner_buffer.data(), tmp_buffer.data());
67 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
68 }
69 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
70 }
54} 71}
55 72
56} // namespace Tegra::Engines 73} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 78b6c3e45..5f892ddad 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <vector>
9#include "common/bit_field.h" 10#include "common/bit_field.h"
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -51,7 +52,11 @@ public:
51 u32 address_high; 52 u32 address_high;
52 u32 address_low; 53 u32 address_low;
53 u32 pitch; 54 u32 pitch;
54 u32 block_dimensions; 55 union {
56 BitField<0, 4, u32> block_width;
57 BitField<4, 4, u32> block_height;
58 BitField<8, 4, u32> block_depth;
59 };
55 u32 width; 60 u32 width;
56 u32 height; 61 u32 height;
57 u32 depth; 62 u32 depth;
@@ -63,6 +68,18 @@ public:
63 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | 68 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
64 address_low); 69 address_low);
65 } 70 }
71
72 u32 BlockWidth() const {
73 return 1U << block_width.Value();
74 }
75
76 u32 BlockHeight() const {
77 return 1U << block_height.Value();
78 }
79
80 u32 BlockDepth() const {
81 return 1U << block_depth.Value();
82 }
66 } dest; 83 } dest;
67 84
68 struct { 85 struct {
@@ -81,6 +98,8 @@ public:
81 98
82 struct { 99 struct {
83 u32 write_offset = 0; 100 u32 write_offset = 0;
101 u32 copy_size = 0;
102 std::vector<u8> inner_buffer;
84 } state{}; 103 } state{};
85 104
86private: 105private:
@@ -88,7 +107,8 @@ private:
88 VideoCore::RasterizerInterface& rasterizer; 107 VideoCore::RasterizerInterface& rasterizer;
89 MemoryManager& memory_manager; 108 MemoryManager& memory_manager;
90 109
91 void ProcessData(u32 data); 110 void ProcessExec();
111 void ProcessData(u32 data, bool is_last_call);
92}; 112};
93 113
94#define ASSERT_REG_POSITION(field_name, position) \ 114#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 995d0e068..217805386 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
288 } 288 }
289} 289}
290 290
291void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
292 const u32 block_height, const std::size_t copy_size, const u8* source_data,
293 u8* swizzle_data) {
294 const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x};
295 std::size_t count = 0;
296 for (std::size_t y = dst_y; y < height && count < copy_size; ++y) {
297 const std::size_t gob_address_y =
298 (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
299 ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
300 const auto& table = legacy_swizzle_table[y % gob_size_y];
301 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
302 const std::size_t gob_address =
303 gob_address_y + (x / gob_size_x) * gob_size * block_height;
304 const std::size_t swizzled_offset = gob_address + table[x % gob_size_x];
305 const u8* source_line = source_data + count;
306 u8* dest_addr = swizzle_data + swizzled_offset;
307 count++;
308
309 std::memcpy(dest_addr, source_line, 1);
310 }
311 }
312}
313
291std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, 314std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
292 u32 height) { 315 u32 height) {
293 std::vector<u8> rgba_data; 316 std::vector<u8> rgba_data;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index e078fa274..e072d8401 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
52 u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
53 53
54void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y,
55 const u32 block_height, const std::size_t copy_size, const u8* source_data,
56 u8* swizzle_data);
57
54} // namespace Tegra::Texture 58} // namespace Tegra::Texture