summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2018-11-24 23:53:17 -0300
committerGravatar ReinUsesLisp2018-11-25 00:37:18 -0300
commitdad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d (patch)
tree9347c330e837eecb8ee39a5b76de7dbdc635ddad /src
parentMerge pull request #1641 from DarkLordZach/sm-register-unregister (diff)
downloadyuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.tar.gz
yuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.tar.xz
yuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.zip
video_core: Move morton functions to their own file
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/morton.cpp353
-rw-r--r--src/video_core/morton.h21
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp193
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/utils.h164
6 files changed, 391 insertions, 345 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a780215c1..3f906a517 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -21,6 +21,8 @@ add_library(video_core STATIC
21 macro_interpreter.h 21 macro_interpreter.h
22 memory_manager.cpp 22 memory_manager.cpp
23 memory_manager.h 23 memory_manager.h
24 morton.cpp
25 morton.h
24 rasterizer_cache.cpp 26 rasterizer_cache.cpp
25 rasterizer_cache.h 27 rasterizer_cache.h
26 rasterizer_interface.h 28 rasterizer_interface.h
@@ -62,7 +64,6 @@ add_library(video_core STATIC
62 textures/decoders.cpp 64 textures/decoders.cpp
63 textures/decoders.h 65 textures/decoders.h
64 textures/texture.h 66 textures/texture.h
65 utils.h
66 video_core.cpp 67 video_core.cpp
67 video_core.h 68 video_core.h
68) 69)
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
new file mode 100644
index 000000000..3dd0e4754
--- /dev/null
+++ b/src/video_core/morton.cpp
@@ -0,0 +1,353 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h"
11#include "video_core/surface.h"
12#include "video_core/textures/decoders.h"
13
14namespace VideoCore {
15
16using Surface::GetBytesPerPixel;
17using Surface::PixelFormat;
18
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21
22template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u8* buffer, std::size_t buffer_size, VAddr addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
28 // pixel values.
29 const u32 tile_size_x{GetDefaultBlockWidth(format)};
30 const u32 tile_size_y{GetDefaultBlockHeight(format)};
31
32 if (morton_to_linear) {
33 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
34 stride, height, depth, block_height, block_depth);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
39 buffer, false, block_height, block_depth);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 // clang-format off
45 MortonCopy<true, PixelFormat::ABGR8U>,
46 MortonCopy<true, PixelFormat::ABGR8S>,
47 MortonCopy<true, PixelFormat::ABGR8UI>,
48 MortonCopy<true, PixelFormat::B5G6R5U>,
49 MortonCopy<true, PixelFormat::A2B10G10R10U>,
50 MortonCopy<true, PixelFormat::A1B5G5R5U>,
51 MortonCopy<true, PixelFormat::R8U>,
52 MortonCopy<true, PixelFormat::R8UI>,
53 MortonCopy<true, PixelFormat::RGBA16F>,
54 MortonCopy<true, PixelFormat::RGBA16U>,
55 MortonCopy<true, PixelFormat::RGBA16UI>,
56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
57 MortonCopy<true, PixelFormat::RGBA32UI>,
58 MortonCopy<true, PixelFormat::DXT1>,
59 MortonCopy<true, PixelFormat::DXT23>,
60 MortonCopy<true, PixelFormat::DXT45>,
61 MortonCopy<true, PixelFormat::DXN1>,
62 MortonCopy<true, PixelFormat::DXN2UNORM>,
63 MortonCopy<true, PixelFormat::DXN2SNORM>,
64 MortonCopy<true, PixelFormat::BC7U>,
65 MortonCopy<true, PixelFormat::BC6H_UF16>,
66 MortonCopy<true, PixelFormat::BC6H_SF16>,
67 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
68 MortonCopy<true, PixelFormat::G8R8U>,
69 MortonCopy<true, PixelFormat::G8R8S>,
70 MortonCopy<true, PixelFormat::BGRA8>,
71 MortonCopy<true, PixelFormat::RGBA32F>,
72 MortonCopy<true, PixelFormat::RG32F>,
73 MortonCopy<true, PixelFormat::R32F>,
74 MortonCopy<true, PixelFormat::R16F>,
75 MortonCopy<true, PixelFormat::R16U>,
76 MortonCopy<true, PixelFormat::R16S>,
77 MortonCopy<true, PixelFormat::R16UI>,
78 MortonCopy<true, PixelFormat::R16I>,
79 MortonCopy<true, PixelFormat::RG16>,
80 MortonCopy<true, PixelFormat::RG16F>,
81 MortonCopy<true, PixelFormat::RG16UI>,
82 MortonCopy<true, PixelFormat::RG16I>,
83 MortonCopy<true, PixelFormat::RG16S>,
84 MortonCopy<true, PixelFormat::RGB32F>,
85 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
86 MortonCopy<true, PixelFormat::RG8U>,
87 MortonCopy<true, PixelFormat::RG8S>,
88 MortonCopy<true, PixelFormat::RG32UI>,
89 MortonCopy<true, PixelFormat::R32UI>,
90 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
91 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
92 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
93 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
94 MortonCopy<true, PixelFormat::DXT1_SRGB>,
95 MortonCopy<true, PixelFormat::DXT23_SRGB>,
96 MortonCopy<true, PixelFormat::DXT45_SRGB>,
97 MortonCopy<true, PixelFormat::BC7U_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
99 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
100 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
101 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
103 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
104 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
105 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
106 MortonCopy<true, PixelFormat::Z32F>,
107 MortonCopy<true, PixelFormat::Z16>,
108 MortonCopy<true, PixelFormat::Z24S8>,
109 MortonCopy<true, PixelFormat::S8Z24>,
110 MortonCopy<true, PixelFormat::Z32FS8>,
111 // clang-format on
112};
113
114static constexpr ConversionArray linear_to_morton_fns = {
115 // clang-format off
116 MortonCopy<false, PixelFormat::ABGR8U>,
117 MortonCopy<false, PixelFormat::ABGR8S>,
118 MortonCopy<false, PixelFormat::ABGR8UI>,
119 MortonCopy<false, PixelFormat::B5G6R5U>,
120 MortonCopy<false, PixelFormat::A2B10G10R10U>,
121 MortonCopy<false, PixelFormat::A1B5G5R5U>,
122 MortonCopy<false, PixelFormat::R8U>,
123 MortonCopy<false, PixelFormat::R8UI>,
124 MortonCopy<false, PixelFormat::RGBA16F>,
125 MortonCopy<false, PixelFormat::RGBA16U>,
126 MortonCopy<false, PixelFormat::RGBA16UI>,
127 MortonCopy<false, PixelFormat::R11FG11FB10F>,
128 MortonCopy<false, PixelFormat::RGBA32UI>,
129 MortonCopy<false, PixelFormat::DXT1>,
130 MortonCopy<false, PixelFormat::DXT23>,
131 MortonCopy<false, PixelFormat::DXT45>,
132 MortonCopy<false, PixelFormat::DXN1>,
133 MortonCopy<false, PixelFormat::DXN2UNORM>,
134 MortonCopy<false, PixelFormat::DXN2SNORM>,
135 MortonCopy<false, PixelFormat::BC7U>,
136 MortonCopy<false, PixelFormat::BC6H_UF16>,
137 MortonCopy<false, PixelFormat::BC6H_SF16>,
138 // TODO(Subv): Swizzling ASTC formats are not supported
139 nullptr,
140 MortonCopy<false, PixelFormat::G8R8U>,
141 MortonCopy<false, PixelFormat::G8R8S>,
142 MortonCopy<false, PixelFormat::BGRA8>,
143 MortonCopy<false, PixelFormat::RGBA32F>,
144 MortonCopy<false, PixelFormat::RG32F>,
145 MortonCopy<false, PixelFormat::R32F>,
146 MortonCopy<false, PixelFormat::R16F>,
147 MortonCopy<false, PixelFormat::R16U>,
148 MortonCopy<false, PixelFormat::R16S>,
149 MortonCopy<false, PixelFormat::R16UI>,
150 MortonCopy<false, PixelFormat::R16I>,
151 MortonCopy<false, PixelFormat::RG16>,
152 MortonCopy<false, PixelFormat::RG16F>,
153 MortonCopy<false, PixelFormat::RG16UI>,
154 MortonCopy<false, PixelFormat::RG16I>,
155 MortonCopy<false, PixelFormat::RG16S>,
156 MortonCopy<false, PixelFormat::RGB32F>,
157 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
158 MortonCopy<false, PixelFormat::RG8U>,
159 MortonCopy<false, PixelFormat::RG8S>,
160 MortonCopy<false, PixelFormat::RG32UI>,
161 MortonCopy<false, PixelFormat::R32UI>,
162 nullptr,
163 nullptr,
164 nullptr,
165 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
166 MortonCopy<false, PixelFormat::DXT1_SRGB>,
167 MortonCopy<false, PixelFormat::DXT23_SRGB>,
168 MortonCopy<false, PixelFormat::DXT45_SRGB>,
169 MortonCopy<false, PixelFormat::BC7U_SRGB>,
170 nullptr,
171 nullptr,
172 nullptr,
173 nullptr,
174 nullptr,
175 nullptr,
176 nullptr,
177 nullptr,
178 MortonCopy<false, PixelFormat::Z32F>,
179 MortonCopy<false, PixelFormat::Z16>,
180 MortonCopy<false, PixelFormat::Z24S8>,
181 MortonCopy<false, PixelFormat::S8Z24>,
182 MortonCopy<false, PixelFormat::Z32FS8>,
183 // clang-format on
184};
185
186constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
187 switch (mode) {
188 case MortonSwizzleMode::MortonToLinear:
189 return morton_to_linear_fns[static_cast<std::size_t>(format)];
190 case MortonSwizzleMode::LinearToMorton:
191 return linear_to_morton_fns[static_cast<std::size_t>(format)];
192 }
193}
194
195/// 8x8 Z-Order coordinate from 2D coordinates
196static u32 MortonInterleave(u32 x, u32 y) {
197 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
198 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
199 return xlut[x % 8] + ylut[y % 8];
200}
201
202/// Calculates the offset of the position of the pixel in Morton order
203static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
204 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
205 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
206 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
207 // texels are laid out in a 2x2 subtile like this:
208 // 2 3
209 // 0 1
210 //
211 // The full 8x8 tile has the texels arranged like this:
212 //
213 // 42 43 46 47 58 59 62 63
214 // 40 41 44 45 56 57 60 61
215 // 34 35 38 39 50 51 54 55
216 // 32 33 36 37 48 49 52 53
217 // 10 11 14 15 26 27 30 31
218 // 08 09 12 13 24 25 28 29
219 // 02 03 06 07 18 19 22 23
220 // 00 01 04 05 16 17 20 21
221 //
222 // This pattern is what's called Z-order curve, or Morton order.
223
224 const unsigned int block_height = 8;
225 const unsigned int coarse_x = x & ~7;
226
227 u32 i = MortonInterleave(x, y);
228
229 const unsigned int offset = coarse_x * block_height;
230
231 return (i + offset) * bytes_per_pixel;
232}
233
234static u32 MortonInterleave128(u32 x, u32 y) {
235 // 128x128 Z-Order coordinate from 2D coordinates
236 static constexpr u32 xlut[] = {
237 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
238 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
239 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
240 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
241 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
242 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
243 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
244 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
245 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
246 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
247 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
248 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
249 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
250 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
251 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
252 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
253 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
254 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
255 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
256 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
257 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
258 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
259 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
260 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
261 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
262 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
263 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
264 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
265 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
266 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
267 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
268 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
269 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
270 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
271 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
272 };
273 static constexpr u32 ylut[] = {
274 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
275 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
276 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
277 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
278 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
279 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
280 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
281 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
282 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
283 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
284 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
285 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
286 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
287 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
288 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
289 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
290 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
291 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
292 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
293 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
294 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
295 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
296 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
297 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
298 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
299 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
300 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
301 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
302 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
303 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
304 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
305 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
306 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
307 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
308 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
309 };
310 return xlut[x % 128] + ylut[y % 128];
311}
312
313static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
314 // Calculates the offset of the position of the pixel in Morton order
315 // Framebuffer images are split into 128x128 tiles.
316
317 const unsigned int block_height = 128;
318 const unsigned int coarse_x = x & ~127;
319
320 u32 i = MortonInterleave128(x, y);
321
322 const unsigned int offset = coarse_x * block_height;
323
324 return (i + offset) * bytes_per_pixel;
325}
326
327void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
328 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
329 std::size_t buffer_size, VAddr addr) {
330
331 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer,
332 buffer_size, addr);
333}
334
335void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
336 u8* morton_data, u8* linear_data, bool morton_to_linear) {
337 u8* data_ptrs[2];
338 for (unsigned y = 0; y < height; ++y) {
339 for (unsigned x = 0; x < width; ++x) {
340 const u32 coarse_y = y & ~127;
341 u32 morton_offset =
342 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
343 u32 gl_pixel_index = (x + y * width) * linear_bytes_per_pixel;
344
345 data_ptrs[morton_to_linear] = morton_data + morton_offset;
346 data_ptrs[!morton_to_linear] = &linear_data[gl_pixel_index];
347
348 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
349 }
350 }
351}
352
353} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
new file mode 100644
index 000000000..b9b9eca86
--- /dev/null
+++ b/src/video_core/morton.h
@@ -0,0 +1,21 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer,
16 std::size_t buffer_size, VAddr addr);
17
18void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel,
19 u8* morton_data, u8* linear_data, bool morton_to_linear);
20
21} // namespace VideoCore \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 4f434fc31..d458f77e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -15,6 +15,7 @@
15#include "core/memory.h" 15#include "core/memory.h"
16#include "core/settings.h" 16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/morton.h"
18#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
19#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
20#include "video_core/renderer_opengl/gl_state.h" 21#include "video_core/renderer_opengl/gl_state.h"
@@ -22,10 +23,11 @@
22#include "video_core/surface.h" 23#include "video_core/surface.h"
23#include "video_core/textures/astc.h" 24#include "video_core/textures/astc.h"
24#include "video_core/textures/decoders.h" 25#include "video_core/textures/decoders.h"
25#include "video_core/utils.h"
26 26
27namespace OpenGL { 27namespace OpenGL {
28 28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
29using VideoCore::Surface::ComponentTypeFromDepthFormat; 31using VideoCore::Surface::ComponentTypeFromDepthFormat;
30using VideoCore::Surface::ComponentTypeFromRenderTarget; 32using VideoCore::Surface::ComponentTypeFromRenderTarget;
31using VideoCore::Surface::ComponentTypeFromTexture; 33using VideoCore::Surface::ComponentTypeFromTexture;
@@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
370 return {0, actual_height, MipWidth(mip_level), 0}; 372 return {0, actual_height, MipWidth(mip_level), 0};
371} 373}
372 374
373template <bool morton_to_gl, PixelFormat format> 375void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
374void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
375 std::size_t gl_buffer_size, VAddr addr) {
376 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
377
378 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
379 // pixel values.
380 const u32 tile_size_x{GetDefaultBlockWidth(format)};
381 const u32 tile_size_y{GetDefaultBlockHeight(format)};
382
383 if (morton_to_gl) {
384 Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
385 stride, height, depth, block_height, block_depth);
386 } else {
387 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
388 (height + tile_size_y - 1) / tile_size_y, depth,
389 bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr),
390 gl_buffer, false, block_height, block_depth);
391 }
392}
393
394using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
395 VideoCore::Surface::MaxPixelFormat>;
396
397static constexpr GLConversionArray morton_to_gl_fns = {
398 // clang-format off
399 MortonCopy<true, PixelFormat::ABGR8U>,
400 MortonCopy<true, PixelFormat::ABGR8S>,
401 MortonCopy<true, PixelFormat::ABGR8UI>,
402 MortonCopy<true, PixelFormat::B5G6R5U>,
403 MortonCopy<true, PixelFormat::A2B10G10R10U>,
404 MortonCopy<true, PixelFormat::A1B5G5R5U>,
405 MortonCopy<true, PixelFormat::R8U>,
406 MortonCopy<true, PixelFormat::R8UI>,
407 MortonCopy<true, PixelFormat::RGBA16F>,
408 MortonCopy<true, PixelFormat::RGBA16U>,
409 MortonCopy<true, PixelFormat::RGBA16UI>,
410 MortonCopy<true, PixelFormat::R11FG11FB10F>,
411 MortonCopy<true, PixelFormat::RGBA32UI>,
412 MortonCopy<true, PixelFormat::DXT1>,
413 MortonCopy<true, PixelFormat::DXT23>,
414 MortonCopy<true, PixelFormat::DXT45>,
415 MortonCopy<true, PixelFormat::DXN1>,
416 MortonCopy<true, PixelFormat::DXN2UNORM>,
417 MortonCopy<true, PixelFormat::DXN2SNORM>,
418 MortonCopy<true, PixelFormat::BC7U>,
419 MortonCopy<true, PixelFormat::BC6H_UF16>,
420 MortonCopy<true, PixelFormat::BC6H_SF16>,
421 MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
422 MortonCopy<true, PixelFormat::G8R8U>,
423 MortonCopy<true, PixelFormat::G8R8S>,
424 MortonCopy<true, PixelFormat::BGRA8>,
425 MortonCopy<true, PixelFormat::RGBA32F>,
426 MortonCopy<true, PixelFormat::RG32F>,
427 MortonCopy<true, PixelFormat::R32F>,
428 MortonCopy<true, PixelFormat::R16F>,
429 MortonCopy<true, PixelFormat::R16U>,
430 MortonCopy<true, PixelFormat::R16S>,
431 MortonCopy<true, PixelFormat::R16UI>,
432 MortonCopy<true, PixelFormat::R16I>,
433 MortonCopy<true, PixelFormat::RG16>,
434 MortonCopy<true, PixelFormat::RG16F>,
435 MortonCopy<true, PixelFormat::RG16UI>,
436 MortonCopy<true, PixelFormat::RG16I>,
437 MortonCopy<true, PixelFormat::RG16S>,
438 MortonCopy<true, PixelFormat::RGB32F>,
439 MortonCopy<true, PixelFormat::RGBA8_SRGB>,
440 MortonCopy<true, PixelFormat::RG8U>,
441 MortonCopy<true, PixelFormat::RG8S>,
442 MortonCopy<true, PixelFormat::RG32UI>,
443 MortonCopy<true, PixelFormat::R32UI>,
444 MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
445 MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
446 MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
447 MortonCopy<true, PixelFormat::BGRA8_SRGB>,
448 MortonCopy<true, PixelFormat::DXT1_SRGB>,
449 MortonCopy<true, PixelFormat::DXT23_SRGB>,
450 MortonCopy<true, PixelFormat::DXT45_SRGB>,
451 MortonCopy<true, PixelFormat::BC7U_SRGB>,
452 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
453 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
454 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
455 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
456 MortonCopy<true, PixelFormat::ASTC_2D_5X5>,
457 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
458 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
459 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
460 MortonCopy<true, PixelFormat::Z32F>,
461 MortonCopy<true, PixelFormat::Z16>,
462 MortonCopy<true, PixelFormat::Z24S8>,
463 MortonCopy<true, PixelFormat::S8Z24>,
464 MortonCopy<true, PixelFormat::Z32FS8>,
465 // clang-format on
466};
467
468static constexpr GLConversionArray gl_to_morton_fns = {
469 // clang-format off
470 MortonCopy<false, PixelFormat::ABGR8U>,
471 MortonCopy<false, PixelFormat::ABGR8S>,
472 MortonCopy<false, PixelFormat::ABGR8UI>,
473 MortonCopy<false, PixelFormat::B5G6R5U>,
474 MortonCopy<false, PixelFormat::A2B10G10R10U>,
475 MortonCopy<false, PixelFormat::A1B5G5R5U>,
476 MortonCopy<false, PixelFormat::R8U>,
477 MortonCopy<false, PixelFormat::R8UI>,
478 MortonCopy<false, PixelFormat::RGBA16F>,
479 MortonCopy<false, PixelFormat::RGBA16U>,
480 MortonCopy<false, PixelFormat::RGBA16UI>,
481 MortonCopy<false, PixelFormat::R11FG11FB10F>,
482 MortonCopy<false, PixelFormat::RGBA32UI>,
483 MortonCopy<false, PixelFormat::DXT1>,
484 MortonCopy<false, PixelFormat::DXT23>,
485 MortonCopy<false, PixelFormat::DXT45>,
486 MortonCopy<false, PixelFormat::DXN1>,
487 MortonCopy<false, PixelFormat::DXN2UNORM>,
488 MortonCopy<false, PixelFormat::DXN2SNORM>,
489 MortonCopy<false, PixelFormat::BC7U>,
490 MortonCopy<false, PixelFormat::BC6H_UF16>,
491 MortonCopy<false, PixelFormat::BC6H_SF16>,
492 // TODO(Subv): Swizzling ASTC formats are not supported
493 nullptr,
494 MortonCopy<false, PixelFormat::G8R8U>,
495 MortonCopy<false, PixelFormat::G8R8S>,
496 MortonCopy<false, PixelFormat::BGRA8>,
497 MortonCopy<false, PixelFormat::RGBA32F>,
498 MortonCopy<false, PixelFormat::RG32F>,
499 MortonCopy<false, PixelFormat::R32F>,
500 MortonCopy<false, PixelFormat::R16F>,
501 MortonCopy<false, PixelFormat::R16U>,
502 MortonCopy<false, PixelFormat::R16S>,
503 MortonCopy<false, PixelFormat::R16UI>,
504 MortonCopy<false, PixelFormat::R16I>,
505 MortonCopy<false, PixelFormat::RG16>,
506 MortonCopy<false, PixelFormat::RG16F>,
507 MortonCopy<false, PixelFormat::RG16UI>,
508 MortonCopy<false, PixelFormat::RG16I>,
509 MortonCopy<false, PixelFormat::RG16S>,
510 MortonCopy<false, PixelFormat::RGB32F>,
511 MortonCopy<false, PixelFormat::RGBA8_SRGB>,
512 MortonCopy<false, PixelFormat::RG8U>,
513 MortonCopy<false, PixelFormat::RG8S>,
514 MortonCopy<false, PixelFormat::RG32UI>,
515 MortonCopy<false, PixelFormat::R32UI>,
516 nullptr,
517 nullptr,
518 nullptr,
519 MortonCopy<false, PixelFormat::BGRA8_SRGB>,
520 MortonCopy<false, PixelFormat::DXT1_SRGB>,
521 MortonCopy<false, PixelFormat::DXT23_SRGB>,
522 MortonCopy<false, PixelFormat::DXT45_SRGB>,
523 MortonCopy<false, PixelFormat::BC7U_SRGB>,
524 nullptr,
525 nullptr,
526 nullptr,
527 nullptr,
528 nullptr,
529 nullptr,
530 nullptr,
531 nullptr,
532 MortonCopy<false, PixelFormat::Z32F>,
533 MortonCopy<false, PixelFormat::Z16>,
534 MortonCopy<false, PixelFormat::Z24S8>,
535 MortonCopy<false, PixelFormat::S8Z24>,
536 MortonCopy<false, PixelFormat::Z32FS8>,
537 // clang-format on
538};
539
540void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params,
541 std::vector<u8>& gl_buffer, u32 mip_level) { 376 std::vector<u8>& gl_buffer, u32 mip_level) {
542 u32 depth = params.MipDepth(mip_level); 377 u32 depth = params.MipDepth(mip_level);
543 if (params.target == SurfaceTarget::Texture2D) { 378 if (params.target == SurfaceTarget::Texture2D) {
@@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params
550 const u64 layer_size = params.LayerMemorySize(); 385 const u64 layer_size = params.LayerMemorySize();
551 const u64 gl_size = params.LayerSizeGL(mip_level); 386 const u64 gl_size = params.LayerSizeGL(mip_level);
552 for (u32 i = 0; i < params.depth; i++) { 387 for (u32 i = 0; i < params.depth; i++) {
553 functions[static_cast<std::size_t>(params.pixel_format)]( 388 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
554 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 389 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
555 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, 390 params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size,
556 gl_buffer.data() + offset_gl, gl_size, params.addr + offset); 391 params.addr + offset);
557 offset += layer_size; 392 offset += layer_size;
558 offset_gl += gl_size; 393 offset_gl += gl_size;
559 } 394 }
560 } else { 395 } else {
561 const u64 offset = params.GetMipmapLevelOffset(mip_level); 396 const u64 offset = params.GetMipmapLevelOffset(mip_level);
562 functions[static_cast<std::size_t>(params.pixel_format)]( 397 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
563 params.MipWidth(mip_level), params.MipBlockHeight(mip_level), 398 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
564 params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), 399 params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(),
565 gl_buffer.size(), params.addr + offset); 400 params.addr + offset);
566 } 401 }
567} 402}
568 403
@@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() {
996 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
997 params.block_width, static_cast<u32>(params.target)); 832 params.block_width, static_cast<u32>(params.target));
998 for (u32 i = 0; i < params.max_mip_level; i++) 833 for (u32 i = 0; i < params.max_mip_level; i++)
999 SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); 834 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
1000 } else { 835 } else {
1001 const auto texture_src_data{Memory::GetPointer(params.addr)}; 836 const auto texture_src_data{Memory::GetPointer(params.addr)};
1002 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; 837 const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl};
@@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() {
1035 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 870 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
1036 params.block_width, static_cast<u32>(params.target)); 871 params.block_width, static_cast<u32>(params.target));
1037 872
1038 SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); 873 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
1039 } else { 874 } else {
1040 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); 875 std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes());
1041 } 876 }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1492e063a..4fd0d66c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -19,9 +19,9 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "core/telemetry_session.h" 20#include "core/telemetry_session.h"
21#include "core/tracer/recorder.h" 21#include "core/tracer/recorder.h"
22#include "video_core/morton.h"
22#include "video_core/renderer_opengl/gl_rasterizer.h" 23#include "video_core/renderer_opengl/gl_rasterizer.h"
23#include "video_core/renderer_opengl/renderer_opengl.h" 24#include "video_core/renderer_opengl/renderer_opengl.h"
24#include "video_core/utils.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27 27
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
deleted file mode 100644
index e0a14d48f..000000000
--- a/src/video_core/utils.h
+++ /dev/null
@@ -1,164 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore {
10
11// 8x8 Z-Order coordinate from 2D coordinates
12static inline u32 MortonInterleave(u32 x, u32 y) {
13 static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
14 static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
15 return xlut[x % 8] + ylut[y % 8];
16}
17
18/**
19 * Calculates the offset of the position of the pixel in Morton order
20 */
21static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) {
22 // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each
23 // of which is composed of four 2x2 subtiles each of which is composed of four texels.
24 // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g.
25 // texels are laid out in a 2x2 subtile like this:
26 // 2 3
27 // 0 1
28 //
29 // The full 8x8 tile has the texels arranged like this:
30 //
31 // 42 43 46 47 58 59 62 63
32 // 40 41 44 45 56 57 60 61
33 // 34 35 38 39 50 51 54 55
34 // 32 33 36 37 48 49 52 53
35 // 10 11 14 15 26 27 30 31
36 // 08 09 12 13 24 25 28 29
37 // 02 03 06 07 18 19 22 23
38 // 00 01 04 05 16 17 20 21
39 //
40 // This pattern is what's called Z-order curve, or Morton order.
41
42 const unsigned int block_height = 8;
43 const unsigned int coarse_x = x & ~7;
44
45 u32 i = VideoCore::MortonInterleave(x, y);
46
47 const unsigned int offset = coarse_x * block_height;
48
49 return (i + offset) * bytes_per_pixel;
50}
51
52static inline u32 MortonInterleave128(u32 x, u32 y) {
53 // 128x128 Z-Order coordinate from 2D coordinates
54 static constexpr u32 xlut[] = {
55 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
56 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
57 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
58 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
59 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
60 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
61 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
62 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
63 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
64 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
65 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
66 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
67 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
68 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
69 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
70 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
71 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
72 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
73 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
74 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
75 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
76 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
77 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
78 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
79 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
80 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
81 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
82 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
83 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
84 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
85 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
86 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
87 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
88 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
89 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
90 };
91 static constexpr u32 ylut[] = {
92 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
93 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
94 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
95 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
96 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
97 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
98 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
99 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
100 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
101 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
102 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
103 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
104 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
105 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
106 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
107 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
108 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
109 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
110 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
111 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
112 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
113 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
114 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
115 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
116 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
117 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
118 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
119 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
120 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
121 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
122 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
123 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
124 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
125 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
126 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
127 };
128 return xlut[x % 128] + ylut[y % 128];
129}
130
131static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
132 // Calculates the offset of the position of the pixel in Morton order
133 // Framebuffer images are split into 128x128 tiles.
134
135 const unsigned int block_height = 128;
136 const unsigned int coarse_x = x & ~127;
137
138 u32 i = MortonInterleave128(x, y);
139
140 const unsigned int offset = coarse_x * block_height;
141
142 return (i + offset) * bytes_per_pixel;
143}
144
145static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel,
146 u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data,
147 bool morton_to_gl) {
148 u8* data_ptrs[2];
149 for (unsigned y = 0; y < height; ++y) {
150 for (unsigned x = 0; x < width; ++x) {
151 const u32 coarse_y = y & ~127;
152 u32 morton_offset =
153 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
154 u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel;
155
156 data_ptrs[morton_to_gl] = morton_data + morton_offset;
157 data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index];
158
159 memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
160 }
161 }
162}
163
164} // namespace VideoCore