diff options
| author | 2018-11-24 23:53:17 -0300 | |
|---|---|---|
| committer | 2018-11-25 00:37:18 -0300 | |
| commit | dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d (patch) | |
| tree | 9347c330e837eecb8ee39a5b76de7dbdc635ddad /src | |
| parent | Merge pull request #1641 from DarkLordZach/sm-register-unregister (diff) | |
| download | yuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.tar.gz yuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.tar.xz yuzu-dad3a6718e942862b2e6b98a9bea3fa3b0fdeb5d.zip | |
video_core: Move morton functions to their own file
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/morton.cpp | 353 | ||||
| -rw-r--r-- | src/video_core/morton.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 193 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/utils.h | 164 |
6 files changed, 391 insertions, 345 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index a780215c1..3f906a517 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -21,6 +21,8 @@ add_library(video_core STATIC | |||
| 21 | macro_interpreter.h | 21 | macro_interpreter.h |
| 22 | memory_manager.cpp | 22 | memory_manager.cpp |
| 23 | memory_manager.h | 23 | memory_manager.h |
| 24 | morton.cpp | ||
| 25 | morton.h | ||
| 24 | rasterizer_cache.cpp | 26 | rasterizer_cache.cpp |
| 25 | rasterizer_cache.h | 27 | rasterizer_cache.h |
| 26 | rasterizer_interface.h | 28 | rasterizer_interface.h |
| @@ -62,7 +64,6 @@ add_library(video_core STATIC | |||
| 62 | textures/decoders.cpp | 64 | textures/decoders.cpp |
| 63 | textures/decoders.h | 65 | textures/decoders.h |
| 64 | textures/texture.h | 66 | textures/texture.h |
| 65 | utils.h | ||
| 66 | video_core.cpp | 67 | video_core.cpp |
| 67 | video_core.h | 68 | video_core.h |
| 68 | ) | 69 | ) |
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp new file mode 100644 index 000000000..3dd0e4754 --- /dev/null +++ b/src/video_core/morton.cpp | |||
| @@ -0,0 +1,353 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cstring> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/memory.h" | ||
| 10 | #include "video_core/morton.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/textures/decoders.h" | ||
| 13 | |||
| 14 | namespace VideoCore { | ||
| 15 | |||
| 16 | using Surface::GetBytesPerPixel; | ||
| 17 | using Surface::PixelFormat; | ||
| 18 | |||
| 19 | using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr); | ||
| 20 | using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; | ||
| 21 | |||
| 22 | template <bool morton_to_linear, PixelFormat format> | ||
| 23 | static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, | ||
| 24 | u8* buffer, std::size_t buffer_size, VAddr addr) { | ||
| 25 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 26 | |||
| 27 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 28 | // pixel values. | ||
| 29 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 30 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 31 | |||
| 32 | if (morton_to_linear) { | ||
| 33 | Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 34 | stride, height, depth, block_height, block_depth); | ||
| 35 | } else { | ||
| 36 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 37 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 38 | bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), | ||
| 39 | buffer, false, block_height, block_depth); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | static constexpr ConversionArray morton_to_linear_fns = { | ||
| 44 | // clang-format off | ||
| 45 | MortonCopy<true, PixelFormat::ABGR8U>, | ||
| 46 | MortonCopy<true, PixelFormat::ABGR8S>, | ||
| 47 | MortonCopy<true, PixelFormat::ABGR8UI>, | ||
| 48 | MortonCopy<true, PixelFormat::B5G6R5U>, | ||
| 49 | MortonCopy<true, PixelFormat::A2B10G10R10U>, | ||
| 50 | MortonCopy<true, PixelFormat::A1B5G5R5U>, | ||
| 51 | MortonCopy<true, PixelFormat::R8U>, | ||
| 52 | MortonCopy<true, PixelFormat::R8UI>, | ||
| 53 | MortonCopy<true, PixelFormat::RGBA16F>, | ||
| 54 | MortonCopy<true, PixelFormat::RGBA16U>, | ||
| 55 | MortonCopy<true, PixelFormat::RGBA16UI>, | ||
| 56 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | ||
| 57 | MortonCopy<true, PixelFormat::RGBA32UI>, | ||
| 58 | MortonCopy<true, PixelFormat::DXT1>, | ||
| 59 | MortonCopy<true, PixelFormat::DXT23>, | ||
| 60 | MortonCopy<true, PixelFormat::DXT45>, | ||
| 61 | MortonCopy<true, PixelFormat::DXN1>, | ||
| 62 | MortonCopy<true, PixelFormat::DXN2UNORM>, | ||
| 63 | MortonCopy<true, PixelFormat::DXN2SNORM>, | ||
| 64 | MortonCopy<true, PixelFormat::BC7U>, | ||
| 65 | MortonCopy<true, PixelFormat::BC6H_UF16>, | ||
| 66 | MortonCopy<true, PixelFormat::BC6H_SF16>, | ||
| 67 | MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | ||
| 68 | MortonCopy<true, PixelFormat::G8R8U>, | ||
| 69 | MortonCopy<true, PixelFormat::G8R8S>, | ||
| 70 | MortonCopy<true, PixelFormat::BGRA8>, | ||
| 71 | MortonCopy<true, PixelFormat::RGBA32F>, | ||
| 72 | MortonCopy<true, PixelFormat::RG32F>, | ||
| 73 | MortonCopy<true, PixelFormat::R32F>, | ||
| 74 | MortonCopy<true, PixelFormat::R16F>, | ||
| 75 | MortonCopy<true, PixelFormat::R16U>, | ||
| 76 | MortonCopy<true, PixelFormat::R16S>, | ||
| 77 | MortonCopy<true, PixelFormat::R16UI>, | ||
| 78 | MortonCopy<true, PixelFormat::R16I>, | ||
| 79 | MortonCopy<true, PixelFormat::RG16>, | ||
| 80 | MortonCopy<true, PixelFormat::RG16F>, | ||
| 81 | MortonCopy<true, PixelFormat::RG16UI>, | ||
| 82 | MortonCopy<true, PixelFormat::RG16I>, | ||
| 83 | MortonCopy<true, PixelFormat::RG16S>, | ||
| 84 | MortonCopy<true, PixelFormat::RGB32F>, | ||
| 85 | MortonCopy<true, PixelFormat::RGBA8_SRGB>, | ||
| 86 | MortonCopy<true, PixelFormat::RG8U>, | ||
| 87 | MortonCopy<true, PixelFormat::RG8S>, | ||
| 88 | MortonCopy<true, PixelFormat::RG32UI>, | ||
| 89 | MortonCopy<true, PixelFormat::R32UI>, | ||
| 90 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||
| 91 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, | ||
| 92 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, | ||
| 93 | MortonCopy<true, PixelFormat::BGRA8_SRGB>, | ||
| 94 | MortonCopy<true, PixelFormat::DXT1_SRGB>, | ||
| 95 | MortonCopy<true, PixelFormat::DXT23_SRGB>, | ||
| 96 | MortonCopy<true, PixelFormat::DXT45_SRGB>, | ||
| 97 | MortonCopy<true, PixelFormat::BC7U_SRGB>, | ||
| 98 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 99 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 100 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 101 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 102 | MortonCopy<true, PixelFormat::ASTC_2D_5X5>, | ||
| 103 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 104 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, | ||
| 105 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 106 | MortonCopy<true, PixelFormat::Z32F>, | ||
| 107 | MortonCopy<true, PixelFormat::Z16>, | ||
| 108 | MortonCopy<true, PixelFormat::Z24S8>, | ||
| 109 | MortonCopy<true, PixelFormat::S8Z24>, | ||
| 110 | MortonCopy<true, PixelFormat::Z32FS8>, | ||
| 111 | // clang-format on | ||
| 112 | }; | ||
| 113 | |||
| 114 | static constexpr ConversionArray linear_to_morton_fns = { | ||
| 115 | // clang-format off | ||
| 116 | MortonCopy<false, PixelFormat::ABGR8U>, | ||
| 117 | MortonCopy<false, PixelFormat::ABGR8S>, | ||
| 118 | MortonCopy<false, PixelFormat::ABGR8UI>, | ||
| 119 | MortonCopy<false, PixelFormat::B5G6R5U>, | ||
| 120 | MortonCopy<false, PixelFormat::A2B10G10R10U>, | ||
| 121 | MortonCopy<false, PixelFormat::A1B5G5R5U>, | ||
| 122 | MortonCopy<false, PixelFormat::R8U>, | ||
| 123 | MortonCopy<false, PixelFormat::R8UI>, | ||
| 124 | MortonCopy<false, PixelFormat::RGBA16F>, | ||
| 125 | MortonCopy<false, PixelFormat::RGBA16U>, | ||
| 126 | MortonCopy<false, PixelFormat::RGBA16UI>, | ||
| 127 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | ||
| 128 | MortonCopy<false, PixelFormat::RGBA32UI>, | ||
| 129 | MortonCopy<false, PixelFormat::DXT1>, | ||
| 130 | MortonCopy<false, PixelFormat::DXT23>, | ||
| 131 | MortonCopy<false, PixelFormat::DXT45>, | ||
| 132 | MortonCopy<false, PixelFormat::DXN1>, | ||
| 133 | MortonCopy<false, PixelFormat::DXN2UNORM>, | ||
| 134 | MortonCopy<false, PixelFormat::DXN2SNORM>, | ||
| 135 | MortonCopy<false, PixelFormat::BC7U>, | ||
| 136 | MortonCopy<false, PixelFormat::BC6H_UF16>, | ||
| 137 | MortonCopy<false, PixelFormat::BC6H_SF16>, | ||
| 138 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 139 | nullptr, | ||
| 140 | MortonCopy<false, PixelFormat::G8R8U>, | ||
| 141 | MortonCopy<false, PixelFormat::G8R8S>, | ||
| 142 | MortonCopy<false, PixelFormat::BGRA8>, | ||
| 143 | MortonCopy<false, PixelFormat::RGBA32F>, | ||
| 144 | MortonCopy<false, PixelFormat::RG32F>, | ||
| 145 | MortonCopy<false, PixelFormat::R32F>, | ||
| 146 | MortonCopy<false, PixelFormat::R16F>, | ||
| 147 | MortonCopy<false, PixelFormat::R16U>, | ||
| 148 | MortonCopy<false, PixelFormat::R16S>, | ||
| 149 | MortonCopy<false, PixelFormat::R16UI>, | ||
| 150 | MortonCopy<false, PixelFormat::R16I>, | ||
| 151 | MortonCopy<false, PixelFormat::RG16>, | ||
| 152 | MortonCopy<false, PixelFormat::RG16F>, | ||
| 153 | MortonCopy<false, PixelFormat::RG16UI>, | ||
| 154 | MortonCopy<false, PixelFormat::RG16I>, | ||
| 155 | MortonCopy<false, PixelFormat::RG16S>, | ||
| 156 | MortonCopy<false, PixelFormat::RGB32F>, | ||
| 157 | MortonCopy<false, PixelFormat::RGBA8_SRGB>, | ||
| 158 | MortonCopy<false, PixelFormat::RG8U>, | ||
| 159 | MortonCopy<false, PixelFormat::RG8S>, | ||
| 160 | MortonCopy<false, PixelFormat::RG32UI>, | ||
| 161 | MortonCopy<false, PixelFormat::R32UI>, | ||
| 162 | nullptr, | ||
| 163 | nullptr, | ||
| 164 | nullptr, | ||
| 165 | MortonCopy<false, PixelFormat::BGRA8_SRGB>, | ||
| 166 | MortonCopy<false, PixelFormat::DXT1_SRGB>, | ||
| 167 | MortonCopy<false, PixelFormat::DXT23_SRGB>, | ||
| 168 | MortonCopy<false, PixelFormat::DXT45_SRGB>, | ||
| 169 | MortonCopy<false, PixelFormat::BC7U_SRGB>, | ||
| 170 | nullptr, | ||
| 171 | nullptr, | ||
| 172 | nullptr, | ||
| 173 | nullptr, | ||
| 174 | nullptr, | ||
| 175 | nullptr, | ||
| 176 | nullptr, | ||
| 177 | nullptr, | ||
| 178 | MortonCopy<false, PixelFormat::Z32F>, | ||
| 179 | MortonCopy<false, PixelFormat::Z16>, | ||
| 180 | MortonCopy<false, PixelFormat::Z24S8>, | ||
| 181 | MortonCopy<false, PixelFormat::S8Z24>, | ||
| 182 | MortonCopy<false, PixelFormat::Z32FS8>, | ||
| 183 | // clang-format on | ||
| 184 | }; | ||
| 185 | |||
| 186 | constexpr MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) { | ||
| 187 | switch (mode) { | ||
| 188 | case MortonSwizzleMode::MortonToLinear: | ||
| 189 | return morton_to_linear_fns[static_cast<std::size_t>(format)]; | ||
| 190 | case MortonSwizzleMode::LinearToMorton: | ||
| 191 | return linear_to_morton_fns[static_cast<std::size_t>(format)]; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | /// 8x8 Z-Order coordinate from 2D coordinates | ||
| 196 | static u32 MortonInterleave(u32 x, u32 y) { | ||
| 197 | static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||
| 198 | static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||
| 199 | return xlut[x % 8] + ylut[y % 8]; | ||
| 200 | } | ||
| 201 | |||
| 202 | /// Calculates the offset of the position of the pixel in Morton order | ||
| 203 | static u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 204 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 205 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 206 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 207 | // texels are laid out in a 2x2 subtile like this: | ||
| 208 | // 2 3 | ||
| 209 | // 0 1 | ||
| 210 | // | ||
| 211 | // The full 8x8 tile has the texels arranged like this: | ||
| 212 | // | ||
| 213 | // 42 43 46 47 58 59 62 63 | ||
| 214 | // 40 41 44 45 56 57 60 61 | ||
| 215 | // 34 35 38 39 50 51 54 55 | ||
| 216 | // 32 33 36 37 48 49 52 53 | ||
| 217 | // 10 11 14 15 26 27 30 31 | ||
| 218 | // 08 09 12 13 24 25 28 29 | ||
| 219 | // 02 03 06 07 18 19 22 23 | ||
| 220 | // 00 01 04 05 16 17 20 21 | ||
| 221 | // | ||
| 222 | // This pattern is what's called Z-order curve, or Morton order. | ||
| 223 | |||
| 224 | const unsigned int block_height = 8; | ||
| 225 | const unsigned int coarse_x = x & ~7; | ||
| 226 | |||
| 227 | u32 i = MortonInterleave(x, y); | ||
| 228 | |||
| 229 | const unsigned int offset = coarse_x * block_height; | ||
| 230 | |||
| 231 | return (i + offset) * bytes_per_pixel; | ||
| 232 | } | ||
| 233 | |||
| 234 | static u32 MortonInterleave128(u32 x, u32 y) { | ||
| 235 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 236 | static constexpr u32 xlut[] = { | ||
| 237 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 238 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 239 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 240 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 241 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 242 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 243 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 244 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 245 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 246 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 247 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 248 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 249 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 250 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 251 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 252 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 253 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 254 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 255 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 256 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 257 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 258 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 259 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 260 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 261 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 262 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 263 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 264 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 265 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 266 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 267 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 268 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 269 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 270 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 271 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 272 | }; | ||
| 273 | static constexpr u32 ylut[] = { | ||
| 274 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 275 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 276 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 277 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 278 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 279 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 280 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 281 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 282 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 283 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 284 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 285 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 286 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 287 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 288 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 289 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 290 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 291 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 292 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 293 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 294 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 295 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 296 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 297 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 298 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 299 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 300 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 301 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 302 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 303 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 304 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 305 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 306 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 307 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 308 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 309 | }; | ||
| 310 | return xlut[x % 128] + ylut[y % 128]; | ||
| 311 | } | ||
| 312 | |||
| 313 | static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 314 | // Calculates the offset of the position of the pixel in Morton order | ||
| 315 | // Framebuffer images are split into 128x128 tiles. | ||
| 316 | |||
| 317 | const unsigned int block_height = 128; | ||
| 318 | const unsigned int coarse_x = x & ~127; | ||
| 319 | |||
| 320 | u32 i = MortonInterleave128(x, y); | ||
| 321 | |||
| 322 | const unsigned int offset = coarse_x * block_height; | ||
| 323 | |||
| 324 | return (i + offset) * bytes_per_pixel; | ||
| 325 | } | ||
| 326 | |||
| 327 | void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, | ||
| 328 | u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, | ||
| 329 | std::size_t buffer_size, VAddr addr) { | ||
| 330 | |||
| 331 | GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, buffer, | ||
| 332 | buffer_size, addr); | ||
| 333 | } | ||
| 334 | |||
| 335 | void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, | ||
| 336 | u8* morton_data, u8* linear_data, bool morton_to_linear) { | ||
| 337 | u8* data_ptrs[2]; | ||
| 338 | for (unsigned y = 0; y < height; ++y) { | ||
| 339 | for (unsigned x = 0; x < width; ++x) { | ||
| 340 | const u32 coarse_y = y & ~127; | ||
| 341 | u32 morton_offset = | ||
| 342 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 343 | u32 gl_pixel_index = (x + y * width) * linear_bytes_per_pixel; | ||
| 344 | |||
| 345 | data_ptrs[morton_to_linear] = morton_data + morton_offset; | ||
| 346 | data_ptrs[!morton_to_linear] = &linear_data[gl_pixel_index]; | ||
| 347 | |||
| 348 | memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | } // namespace VideoCore \ No newline at end of file | ||
diff --git a/src/video_core/morton.h b/src/video_core/morton.h new file mode 100644 index 000000000..b9b9eca86 --- /dev/null +++ b/src/video_core/morton.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | |||
| 12 | enum class MortonSwizzleMode { MortonToLinear, LinearToMorton }; | ||
| 13 | |||
| 14 | void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, | ||
| 15 | u32 block_height, u32 height, u32 block_depth, u32 depth, u8* buffer, | ||
| 16 | std::size_t buffer_size, VAddr addr); | ||
| 17 | |||
| 18 | void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, u32 linear_bytes_per_pixel, | ||
| 19 | u8* morton_data, u8* linear_data, bool morton_to_linear); | ||
| 20 | |||
| 21 | } // namespace VideoCore \ No newline at end of file | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 4f434fc31..d458f77e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 16 | #include "core/settings.h" | 16 | #include "core/settings.h" |
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/morton.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 20 | #include "video_core/renderer_opengl/gl_state.h" | 21 | #include "video_core/renderer_opengl/gl_state.h" |
| @@ -22,10 +23,11 @@ | |||
| 22 | #include "video_core/surface.h" | 23 | #include "video_core/surface.h" |
| 23 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/astc.h" |
| 24 | #include "video_core/textures/decoders.h" | 25 | #include "video_core/textures/decoders.h" |
| 25 | #include "video_core/utils.h" | ||
| 26 | 26 | ||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| 28 | 28 | ||
| 29 | using VideoCore::MortonSwizzle; | ||
| 30 | using VideoCore::MortonSwizzleMode; | ||
| 29 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | 31 | using VideoCore::Surface::ComponentTypeFromDepthFormat; |
| 30 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | 32 | using VideoCore::Surface::ComponentTypeFromRenderTarget; |
| 31 | using VideoCore::Surface::ComponentTypeFromTexture; | 33 | using VideoCore::Surface::ComponentTypeFromTexture; |
| @@ -370,174 +372,7 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | |||
| 370 | return {0, actual_height, MipWidth(mip_level), 0}; | 372 | return {0, actual_height, MipWidth(mip_level), 0}; |
| 371 | } | 373 | } |
| 372 | 374 | ||
| 373 | template <bool morton_to_gl, PixelFormat format> | 375 | void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, |
| 374 | void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer, | ||
| 375 | std::size_t gl_buffer_size, VAddr addr) { | ||
| 376 | constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); | ||
| 377 | |||
| 378 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | ||
| 379 | // pixel values. | ||
| 380 | const u32 tile_size_x{GetDefaultBlockWidth(format)}; | ||
| 381 | const u32 tile_size_y{GetDefaultBlockHeight(format)}; | ||
| 382 | |||
| 383 | if (morton_to_gl) { | ||
| 384 | Tegra::Texture::UnswizzleTexture(gl_buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, | ||
| 385 | stride, height, depth, block_height, block_depth); | ||
| 386 | } else { | ||
| 387 | Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x, | ||
| 388 | (height + tile_size_y - 1) / tile_size_y, depth, | ||
| 389 | bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), | ||
| 390 | gl_buffer, false, block_height, block_depth); | ||
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 394 | using GLConversionArray = std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), | ||
| 395 | VideoCore::Surface::MaxPixelFormat>; | ||
| 396 | |||
| 397 | static constexpr GLConversionArray morton_to_gl_fns = { | ||
| 398 | // clang-format off | ||
| 399 | MortonCopy<true, PixelFormat::ABGR8U>, | ||
| 400 | MortonCopy<true, PixelFormat::ABGR8S>, | ||
| 401 | MortonCopy<true, PixelFormat::ABGR8UI>, | ||
| 402 | MortonCopy<true, PixelFormat::B5G6R5U>, | ||
| 403 | MortonCopy<true, PixelFormat::A2B10G10R10U>, | ||
| 404 | MortonCopy<true, PixelFormat::A1B5G5R5U>, | ||
| 405 | MortonCopy<true, PixelFormat::R8U>, | ||
| 406 | MortonCopy<true, PixelFormat::R8UI>, | ||
| 407 | MortonCopy<true, PixelFormat::RGBA16F>, | ||
| 408 | MortonCopy<true, PixelFormat::RGBA16U>, | ||
| 409 | MortonCopy<true, PixelFormat::RGBA16UI>, | ||
| 410 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | ||
| 411 | MortonCopy<true, PixelFormat::RGBA32UI>, | ||
| 412 | MortonCopy<true, PixelFormat::DXT1>, | ||
| 413 | MortonCopy<true, PixelFormat::DXT23>, | ||
| 414 | MortonCopy<true, PixelFormat::DXT45>, | ||
| 415 | MortonCopy<true, PixelFormat::DXN1>, | ||
| 416 | MortonCopy<true, PixelFormat::DXN2UNORM>, | ||
| 417 | MortonCopy<true, PixelFormat::DXN2SNORM>, | ||
| 418 | MortonCopy<true, PixelFormat::BC7U>, | ||
| 419 | MortonCopy<true, PixelFormat::BC6H_UF16>, | ||
| 420 | MortonCopy<true, PixelFormat::BC6H_SF16>, | ||
| 421 | MortonCopy<true, PixelFormat::ASTC_2D_4X4>, | ||
| 422 | MortonCopy<true, PixelFormat::G8R8U>, | ||
| 423 | MortonCopy<true, PixelFormat::G8R8S>, | ||
| 424 | MortonCopy<true, PixelFormat::BGRA8>, | ||
| 425 | MortonCopy<true, PixelFormat::RGBA32F>, | ||
| 426 | MortonCopy<true, PixelFormat::RG32F>, | ||
| 427 | MortonCopy<true, PixelFormat::R32F>, | ||
| 428 | MortonCopy<true, PixelFormat::R16F>, | ||
| 429 | MortonCopy<true, PixelFormat::R16U>, | ||
| 430 | MortonCopy<true, PixelFormat::R16S>, | ||
| 431 | MortonCopy<true, PixelFormat::R16UI>, | ||
| 432 | MortonCopy<true, PixelFormat::R16I>, | ||
| 433 | MortonCopy<true, PixelFormat::RG16>, | ||
| 434 | MortonCopy<true, PixelFormat::RG16F>, | ||
| 435 | MortonCopy<true, PixelFormat::RG16UI>, | ||
| 436 | MortonCopy<true, PixelFormat::RG16I>, | ||
| 437 | MortonCopy<true, PixelFormat::RG16S>, | ||
| 438 | MortonCopy<true, PixelFormat::RGB32F>, | ||
| 439 | MortonCopy<true, PixelFormat::RGBA8_SRGB>, | ||
| 440 | MortonCopy<true, PixelFormat::RG8U>, | ||
| 441 | MortonCopy<true, PixelFormat::RG8S>, | ||
| 442 | MortonCopy<true, PixelFormat::RG32UI>, | ||
| 443 | MortonCopy<true, PixelFormat::R32UI>, | ||
| 444 | MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||
| 445 | MortonCopy<true, PixelFormat::ASTC_2D_8X5>, | ||
| 446 | MortonCopy<true, PixelFormat::ASTC_2D_5X4>, | ||
| 447 | MortonCopy<true, PixelFormat::BGRA8_SRGB>, | ||
| 448 | MortonCopy<true, PixelFormat::DXT1_SRGB>, | ||
| 449 | MortonCopy<true, PixelFormat::DXT23_SRGB>, | ||
| 450 | MortonCopy<true, PixelFormat::DXT45_SRGB>, | ||
| 451 | MortonCopy<true, PixelFormat::BC7U_SRGB>, | ||
| 452 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | ||
| 453 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | ||
| 454 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | ||
| 455 | MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>, | ||
| 456 | MortonCopy<true, PixelFormat::ASTC_2D_5X5>, | ||
| 457 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | ||
| 458 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, | ||
| 459 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | ||
| 460 | MortonCopy<true, PixelFormat::Z32F>, | ||
| 461 | MortonCopy<true, PixelFormat::Z16>, | ||
| 462 | MortonCopy<true, PixelFormat::Z24S8>, | ||
| 463 | MortonCopy<true, PixelFormat::S8Z24>, | ||
| 464 | MortonCopy<true, PixelFormat::Z32FS8>, | ||
| 465 | // clang-format on | ||
| 466 | }; | ||
| 467 | |||
| 468 | static constexpr GLConversionArray gl_to_morton_fns = { | ||
| 469 | // clang-format off | ||
| 470 | MortonCopy<false, PixelFormat::ABGR8U>, | ||
| 471 | MortonCopy<false, PixelFormat::ABGR8S>, | ||
| 472 | MortonCopy<false, PixelFormat::ABGR8UI>, | ||
| 473 | MortonCopy<false, PixelFormat::B5G6R5U>, | ||
| 474 | MortonCopy<false, PixelFormat::A2B10G10R10U>, | ||
| 475 | MortonCopy<false, PixelFormat::A1B5G5R5U>, | ||
| 476 | MortonCopy<false, PixelFormat::R8U>, | ||
| 477 | MortonCopy<false, PixelFormat::R8UI>, | ||
| 478 | MortonCopy<false, PixelFormat::RGBA16F>, | ||
| 479 | MortonCopy<false, PixelFormat::RGBA16U>, | ||
| 480 | MortonCopy<false, PixelFormat::RGBA16UI>, | ||
| 481 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | ||
| 482 | MortonCopy<false, PixelFormat::RGBA32UI>, | ||
| 483 | MortonCopy<false, PixelFormat::DXT1>, | ||
| 484 | MortonCopy<false, PixelFormat::DXT23>, | ||
| 485 | MortonCopy<false, PixelFormat::DXT45>, | ||
| 486 | MortonCopy<false, PixelFormat::DXN1>, | ||
| 487 | MortonCopy<false, PixelFormat::DXN2UNORM>, | ||
| 488 | MortonCopy<false, PixelFormat::DXN2SNORM>, | ||
| 489 | MortonCopy<false, PixelFormat::BC7U>, | ||
| 490 | MortonCopy<false, PixelFormat::BC6H_UF16>, | ||
| 491 | MortonCopy<false, PixelFormat::BC6H_SF16>, | ||
| 492 | // TODO(Subv): Swizzling ASTC formats are not supported | ||
| 493 | nullptr, | ||
| 494 | MortonCopy<false, PixelFormat::G8R8U>, | ||
| 495 | MortonCopy<false, PixelFormat::G8R8S>, | ||
| 496 | MortonCopy<false, PixelFormat::BGRA8>, | ||
| 497 | MortonCopy<false, PixelFormat::RGBA32F>, | ||
| 498 | MortonCopy<false, PixelFormat::RG32F>, | ||
| 499 | MortonCopy<false, PixelFormat::R32F>, | ||
| 500 | MortonCopy<false, PixelFormat::R16F>, | ||
| 501 | MortonCopy<false, PixelFormat::R16U>, | ||
| 502 | MortonCopy<false, PixelFormat::R16S>, | ||
| 503 | MortonCopy<false, PixelFormat::R16UI>, | ||
| 504 | MortonCopy<false, PixelFormat::R16I>, | ||
| 505 | MortonCopy<false, PixelFormat::RG16>, | ||
| 506 | MortonCopy<false, PixelFormat::RG16F>, | ||
| 507 | MortonCopy<false, PixelFormat::RG16UI>, | ||
| 508 | MortonCopy<false, PixelFormat::RG16I>, | ||
| 509 | MortonCopy<false, PixelFormat::RG16S>, | ||
| 510 | MortonCopy<false, PixelFormat::RGB32F>, | ||
| 511 | MortonCopy<false, PixelFormat::RGBA8_SRGB>, | ||
| 512 | MortonCopy<false, PixelFormat::RG8U>, | ||
| 513 | MortonCopy<false, PixelFormat::RG8S>, | ||
| 514 | MortonCopy<false, PixelFormat::RG32UI>, | ||
| 515 | MortonCopy<false, PixelFormat::R32UI>, | ||
| 516 | nullptr, | ||
| 517 | nullptr, | ||
| 518 | nullptr, | ||
| 519 | MortonCopy<false, PixelFormat::BGRA8_SRGB>, | ||
| 520 | MortonCopy<false, PixelFormat::DXT1_SRGB>, | ||
| 521 | MortonCopy<false, PixelFormat::DXT23_SRGB>, | ||
| 522 | MortonCopy<false, PixelFormat::DXT45_SRGB>, | ||
| 523 | MortonCopy<false, PixelFormat::BC7U_SRGB>, | ||
| 524 | nullptr, | ||
| 525 | nullptr, | ||
| 526 | nullptr, | ||
| 527 | nullptr, | ||
| 528 | nullptr, | ||
| 529 | nullptr, | ||
| 530 | nullptr, | ||
| 531 | nullptr, | ||
| 532 | MortonCopy<false, PixelFormat::Z32F>, | ||
| 533 | MortonCopy<false, PixelFormat::Z16>, | ||
| 534 | MortonCopy<false, PixelFormat::Z24S8>, | ||
| 535 | MortonCopy<false, PixelFormat::S8Z24>, | ||
| 536 | MortonCopy<false, PixelFormat::Z32FS8>, | ||
| 537 | // clang-format on | ||
| 538 | }; | ||
| 539 | |||
| 540 | void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params, | ||
| 541 | std::vector<u8>& gl_buffer, u32 mip_level) { | 376 | std::vector<u8>& gl_buffer, u32 mip_level) { |
| 542 | u32 depth = params.MipDepth(mip_level); | 377 | u32 depth = params.MipDepth(mip_level); |
| 543 | if (params.target == SurfaceTarget::Texture2D) { | 378 | if (params.target == SurfaceTarget::Texture2D) { |
| @@ -550,19 +385,19 @@ void SwizzleFunc(const GLConversionArray& functions, const SurfaceParams& params | |||
| 550 | const u64 layer_size = params.LayerMemorySize(); | 385 | const u64 layer_size = params.LayerMemorySize(); |
| 551 | const u64 gl_size = params.LayerSizeGL(mip_level); | 386 | const u64 gl_size = params.LayerSizeGL(mip_level); |
| 552 | for (u32 i = 0; i < params.depth; i++) { | 387 | for (u32 i = 0; i < params.depth; i++) { |
| 553 | functions[static_cast<std::size_t>(params.pixel_format)]( | 388 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 554 | params.MipWidth(mip_level), params.MipBlockHeight(mip_level), | 389 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 555 | params.MipHeight(mip_level), params.MipBlockDepth(mip_level), 1, | 390 | params.MipBlockDepth(mip_level), 1, gl_buffer.data() + offset_gl, gl_size, |
| 556 | gl_buffer.data() + offset_gl, gl_size, params.addr + offset); | 391 | params.addr + offset); |
| 557 | offset += layer_size; | 392 | offset += layer_size; |
| 558 | offset_gl += gl_size; | 393 | offset_gl += gl_size; |
| 559 | } | 394 | } |
| 560 | } else { | 395 | } else { |
| 561 | const u64 offset = params.GetMipmapLevelOffset(mip_level); | 396 | const u64 offset = params.GetMipmapLevelOffset(mip_level); |
| 562 | functions[static_cast<std::size_t>(params.pixel_format)]( | 397 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), |
| 563 | params.MipWidth(mip_level), params.MipBlockHeight(mip_level), | 398 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), |
| 564 | params.MipHeight(mip_level), params.MipBlockDepth(mip_level), depth, gl_buffer.data(), | 399 | params.MipBlockDepth(mip_level), depth, gl_buffer.data(), gl_buffer.size(), |
| 565 | gl_buffer.size(), params.addr + offset); | 400 | params.addr + offset); |
| 566 | } | 401 | } |
| 567 | } | 402 | } |
| 568 | 403 | ||
| @@ -996,7 +831,7 @@ void CachedSurface::LoadGLBuffer() { | |||
| 996 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | 831 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
| 997 | params.block_width, static_cast<u32>(params.target)); | 832 | params.block_width, static_cast<u32>(params.target)); |
| 998 | for (u32 i = 0; i < params.max_mip_level; i++) | 833 | for (u32 i = 0; i < params.max_mip_level; i++) |
| 999 | SwizzleFunc(morton_to_gl_fns, params, gl_buffer[i], i); | 834 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); |
| 1000 | } else { | 835 | } else { |
| 1001 | const auto texture_src_data{Memory::GetPointer(params.addr)}; | 836 | const auto texture_src_data{Memory::GetPointer(params.addr)}; |
| 1002 | const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; | 837 | const auto texture_src_data_end{texture_src_data + params.size_in_bytes_gl}; |
| @@ -1035,7 +870,7 @@ void CachedSurface::FlushGLBuffer() { | |||
| 1035 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | 870 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
| 1036 | params.block_width, static_cast<u32>(params.target)); | 871 | params.block_width, static_cast<u32>(params.target)); |
| 1037 | 872 | ||
| 1038 | SwizzleFunc(gl_to_morton_fns, params, gl_buffer[0], 0); | 873 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); |
| 1039 | } else { | 874 | } else { |
| 1040 | std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); | 875 | std::memcpy(Memory::GetPointer(GetAddr()), gl_buffer[0].data(), GetSizeInBytes()); |
| 1041 | } | 876 | } |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1492e063a..4fd0d66c5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -19,9 +19,9 @@ | |||
| 19 | #include "core/settings.h" | 19 | #include "core/settings.h" |
| 20 | #include "core/telemetry_session.h" | 20 | #include "core/telemetry_session.h" |
| 21 | #include "core/tracer/recorder.h" | 21 | #include "core/tracer/recorder.h" |
| 22 | #include "video_core/morton.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 23 | #include "video_core/renderer_opengl/renderer_opengl.h" | 24 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 24 | #include "video_core/utils.h" | ||
| 25 | 25 | ||
| 26 | namespace OpenGL { | 26 | namespace OpenGL { |
| 27 | 27 | ||
diff --git a/src/video_core/utils.h b/src/video_core/utils.h deleted file mode 100644 index e0a14d48f..000000000 --- a/src/video_core/utils.h +++ /dev/null | |||
| @@ -1,164 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | |||
| 11 | // 8x8 Z-Order coordinate from 2D coordinates | ||
| 12 | static inline u32 MortonInterleave(u32 x, u32 y) { | ||
| 13 | static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15}; | ||
| 14 | static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a}; | ||
| 15 | return xlut[x % 8] + ylut[y % 8]; | ||
| 16 | } | ||
| 17 | |||
| 18 | /** | ||
| 19 | * Calculates the offset of the position of the pixel in Morton order | ||
| 20 | */ | ||
| 21 | static inline u32 GetMortonOffset(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 22 | // Images are split into 8x8 tiles. Each tile is composed of four 4x4 subtiles each | ||
| 23 | // of which is composed of four 2x2 subtiles each of which is composed of four texels. | ||
| 24 | // Each structure is embedded into the next-bigger one in a diagonal pattern, e.g. | ||
| 25 | // texels are laid out in a 2x2 subtile like this: | ||
| 26 | // 2 3 | ||
| 27 | // 0 1 | ||
| 28 | // | ||
| 29 | // The full 8x8 tile has the texels arranged like this: | ||
| 30 | // | ||
| 31 | // 42 43 46 47 58 59 62 63 | ||
| 32 | // 40 41 44 45 56 57 60 61 | ||
| 33 | // 34 35 38 39 50 51 54 55 | ||
| 34 | // 32 33 36 37 48 49 52 53 | ||
| 35 | // 10 11 14 15 26 27 30 31 | ||
| 36 | // 08 09 12 13 24 25 28 29 | ||
| 37 | // 02 03 06 07 18 19 22 23 | ||
| 38 | // 00 01 04 05 16 17 20 21 | ||
| 39 | // | ||
| 40 | // This pattern is what's called Z-order curve, or Morton order. | ||
| 41 | |||
| 42 | const unsigned int block_height = 8; | ||
| 43 | const unsigned int coarse_x = x & ~7; | ||
| 44 | |||
| 45 | u32 i = VideoCore::MortonInterleave(x, y); | ||
| 46 | |||
| 47 | const unsigned int offset = coarse_x * block_height; | ||
| 48 | |||
| 49 | return (i + offset) * bytes_per_pixel; | ||
| 50 | } | ||
| 51 | |||
| 52 | static inline u32 MortonInterleave128(u32 x, u32 y) { | ||
| 53 | // 128x128 Z-Order coordinate from 2D coordinates | ||
| 54 | static constexpr u32 xlut[] = { | ||
| 55 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, | ||
| 56 | 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, | ||
| 57 | 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, | ||
| 58 | 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, | ||
| 59 | 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, | ||
| 60 | 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, | ||
| 61 | 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, | ||
| 62 | 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, | ||
| 63 | 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, | ||
| 64 | 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, | ||
| 65 | 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, | ||
| 66 | 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, | ||
| 67 | 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, | ||
| 68 | 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, | ||
| 69 | 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, | ||
| 70 | 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, | ||
| 71 | 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, | ||
| 72 | 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, | ||
| 73 | 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, | ||
| 74 | 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, | ||
| 75 | 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, | ||
| 76 | 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, | ||
| 77 | 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, | ||
| 78 | 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, | ||
| 79 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, | ||
| 80 | 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, | ||
| 81 | 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, | ||
| 82 | 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, | ||
| 83 | 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, | ||
| 84 | 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, | ||
| 85 | 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, | ||
| 86 | 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, | ||
| 87 | 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, | ||
| 88 | 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, | ||
| 89 | 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, | ||
| 90 | }; | ||
| 91 | static constexpr u32 ylut[] = { | ||
| 92 | 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, | ||
| 93 | 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, | ||
| 94 | 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, | ||
| 95 | 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, | ||
| 96 | 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, | ||
| 97 | 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, | ||
| 98 | 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, | ||
| 99 | 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, | ||
| 100 | 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, | ||
| 101 | 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, | ||
| 102 | 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, | ||
| 103 | 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, | ||
| 104 | 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, | ||
| 105 | 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, | ||
| 106 | 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, | ||
| 107 | 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, | ||
| 108 | 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, | ||
| 109 | 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, | ||
| 110 | 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, | ||
| 111 | 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, | ||
| 112 | 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, | ||
| 113 | 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, | ||
| 114 | 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, | ||
| 115 | 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, | ||
| 116 | 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, | ||
| 117 | 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, | ||
| 118 | 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, | ||
| 119 | 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, | ||
| 120 | 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, | ||
| 121 | 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, | ||
| 122 | 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, | ||
| 123 | 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, | ||
| 124 | 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, | ||
| 125 | 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, | ||
| 126 | 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, | ||
| 127 | }; | ||
| 128 | return xlut[x % 128] + ylut[y % 128]; | ||
| 129 | } | ||
| 130 | |||
| 131 | static inline u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) { | ||
| 132 | // Calculates the offset of the position of the pixel in Morton order | ||
| 133 | // Framebuffer images are split into 128x128 tiles. | ||
| 134 | |||
| 135 | const unsigned int block_height = 128; | ||
| 136 | const unsigned int coarse_x = x & ~127; | ||
| 137 | |||
| 138 | u32 i = MortonInterleave128(x, y); | ||
| 139 | |||
| 140 | const unsigned int offset = coarse_x * block_height; | ||
| 141 | |||
| 142 | return (i + offset) * bytes_per_pixel; | ||
| 143 | } | ||
| 144 | |||
| 145 | static inline void MortonCopyPixels128(u32 width, u32 height, u32 bytes_per_pixel, | ||
| 146 | u32 gl_bytes_per_pixel, u8* morton_data, u8* gl_data, | ||
| 147 | bool morton_to_gl) { | ||
| 148 | u8* data_ptrs[2]; | ||
| 149 | for (unsigned y = 0; y < height; ++y) { | ||
| 150 | for (unsigned x = 0; x < width; ++x) { | ||
| 151 | const u32 coarse_y = y & ~127; | ||
| 152 | u32 morton_offset = | ||
| 153 | GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel; | ||
| 154 | u32 gl_pixel_index = (x + y * width) * gl_bytes_per_pixel; | ||
| 155 | |||
| 156 | data_ptrs[morton_to_gl] = morton_data + morton_offset; | ||
| 157 | data_ptrs[!morton_to_gl] = &gl_data[gl_pixel_index]; | ||
| 158 | |||
| 159 | memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | } // namespace VideoCore | ||