diff options
Diffstat (limited to '')
29 files changed, 1960 insertions, 173 deletions
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 0184289eb..4ff588851 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt | |||
| @@ -157,6 +157,9 @@ endif() | |||
| 157 | add_library(stb stb/stb_dxt.cpp) | 157 | add_library(stb stb/stb_dxt.cpp) |
| 158 | target_include_directories(stb PUBLIC ./stb) | 158 | target_include_directories(stb PUBLIC ./stb) |
| 159 | 159 | ||
| 160 | add_library(bc_decoder bc_decoder/bc_decoder.cpp) | ||
| 161 | target_include_directories(bc_decoder PUBLIC ./bc_decoder) | ||
| 162 | |||
| 160 | if (ANDROID) | 163 | if (ANDROID) |
| 161 | if (ARCHITECTURE_arm64) | 164 | if (ARCHITECTURE_arm64) |
| 162 | add_subdirectory(libadrenotools) | 165 | add_subdirectory(libadrenotools) |
diff --git a/externals/bc_decoder/bc_decoder.cpp b/externals/bc_decoder/bc_decoder.cpp new file mode 100644 index 000000000..536c44f34 --- /dev/null +++ b/externals/bc_decoder/bc_decoder.cpp | |||
| @@ -0,0 +1,1522 @@ | |||
| 1 | // SPDX-License-Identifier: MPL-2.0 | ||
| 2 | // Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) | ||
| 3 | // Copyright 2019 The SwiftShader Authors. All Rights Reserved. | ||
| 4 | |||
| 5 | // This BCn Decoder is directly derivative of Swiftshader's BCn Decoder found at: https://github.com/google/swiftshader/blob/d070309f7d154d6764cbd514b1a5c8bfcef61d06/src/Device/BC_Decoder.cpp | ||
| 6 | // This file does not follow the Skyline code conventions but has certain Skyline specific code | ||
| 7 | // There are a lot of implicit and narrowing conversions in this file due to this (Warnings are disabled as a result) | ||
| 8 | |||
| 9 | #include <array> | ||
| 10 | #include <assert.h> | ||
| 11 | #include <stddef.h> | ||
| 12 | #include <stdint.h> | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr int BlockWidth = 4; | ||
| 16 | constexpr int BlockHeight = 4; | ||
| 17 | |||
| 18 | struct BC_color { | ||
| 19 | void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, bool hasAlphaChannel, bool hasSeparateAlpha) const { | ||
| 20 | Color c[4]; | ||
| 21 | c[0].extract565(c0); | ||
| 22 | c[1].extract565(c1); | ||
| 23 | if (hasSeparateAlpha || (c0 > c1)) { | ||
| 24 | c[2] = ((c[0] * 2) + c[1]) / 3; | ||
| 25 | c[3] = ((c[1] * 2) + c[0]) / 3; | ||
| 26 | } else { | ||
| 27 | c[2] = (c[0] + c[1]) >> 1; | ||
| 28 | if (hasAlphaChannel) { | ||
| 29 | c[3].clearAlpha(); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | for (int j = 0; j < BlockHeight && (y + j) < dstH; j++) { | ||
| 34 | size_t dstOffset = j * dstPitch; | ||
| 35 | size_t idxOffset = j * BlockHeight; | ||
| 36 | for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, idxOffset++, dstOffset += dstBpp) { | ||
| 37 | *reinterpret_cast<unsigned int *>(dst + dstOffset) = c[getIdx(idxOffset)].pack8888(); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | private: | ||
| 43 | struct Color { | ||
| 44 | Color() { | ||
| 45 | c[0] = c[1] = c[2] = 0; | ||
| 46 | c[3] = 0xFF000000; | ||
| 47 | } | ||
| 48 | |||
| 49 | void extract565(const unsigned int c565) { | ||
| 50 | c[0] = ((c565 & 0x0000001F) << 3) | ((c565 & 0x0000001C) >> 2); | ||
| 51 | c[1] = ((c565 & 0x000007E0) >> 3) | ((c565 & 0x00000600) >> 9); | ||
| 52 | c[2] = ((c565 & 0x0000F800) >> 8) | ((c565 & 0x0000E000) >> 13); | ||
| 53 | } | ||
| 54 | |||
| 55 | unsigned int pack8888() const { | ||
| 56 | return ((c[0] & 0xFF) << 16) | ((c[1] & 0xFF) << 8) | (c[2] & 0xFF) | c[3]; | ||
| 57 | } | ||
| 58 | |||
| 59 | void clearAlpha() { | ||
| 60 | c[3] = 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | Color operator*(int factor) const { | ||
| 64 | Color res; | ||
| 65 | for (int i = 0; i < 4; ++i) { | ||
| 66 | res.c[i] = c[i] * factor; | ||
| 67 | } | ||
| 68 | return res; | ||
| 69 | } | ||
| 70 | |||
| 71 | Color operator/(int factor) const { | ||
| 72 | Color res; | ||
| 73 | for (int i = 0; i < 4; ++i) { | ||
| 74 | res.c[i] = c[i] / factor; | ||
| 75 | } | ||
| 76 | return res; | ||
| 77 | } | ||
| 78 | |||
| 79 | Color operator>>(int shift) const { | ||
| 80 | Color res; | ||
| 81 | for (int i = 0; i < 4; ++i) { | ||
| 82 | res.c[i] = c[i] >> shift; | ||
| 83 | } | ||
| 84 | return res; | ||
| 85 | } | ||
| 86 | |||
| 87 | Color operator+(Color const &obj) const { | ||
| 88 | Color res; | ||
| 89 | for (int i = 0; i < 4; ++i) { | ||
| 90 | res.c[i] = c[i] + obj.c[i]; | ||
| 91 | } | ||
| 92 | return res; | ||
| 93 | } | ||
| 94 | |||
| 95 | private: | ||
| 96 | int c[4]; | ||
| 97 | }; | ||
| 98 | |||
| 99 | size_t getIdx(int i) const { | ||
| 100 | size_t offset = i << 1; // 2 bytes per index | ||
| 101 | return (idx & (0x3 << offset)) >> offset; | ||
| 102 | } | ||
| 103 | |||
| 104 | unsigned short c0; | ||
| 105 | unsigned short c1; | ||
| 106 | unsigned int idx; | ||
| 107 | }; | ||
| 108 | static_assert(sizeof(BC_color) == 8, "BC_color must be 8 bytes"); | ||
| 109 | |||
| 110 | struct BC_channel { | ||
| 111 | void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, size_t channel, bool isSigned) const { | ||
| 112 | int c[8] = {0}; | ||
| 113 | |||
| 114 | if (isSigned) { | ||
| 115 | c[0] = static_cast<signed char>(data & 0xFF); | ||
| 116 | c[1] = static_cast<signed char>((data & 0xFF00) >> 8); | ||
| 117 | } else { | ||
| 118 | c[0] = static_cast<uint8_t>(data & 0xFF); | ||
| 119 | c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8); | ||
| 120 | } | ||
| 121 | |||
| 122 | if (c[0] > c[1]) { | ||
| 123 | for (int i = 2; i < 8; ++i) { | ||
| 124 | c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7; | ||
| 125 | } | ||
| 126 | } else { | ||
| 127 | for (int i = 2; i < 6; ++i) { | ||
| 128 | c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5; | ||
| 129 | } | ||
| 130 | c[6] = isSigned ? -128 : 0; | ||
| 131 | c[7] = isSigned ? 127 : 255; | ||
| 132 | } | ||
| 133 | |||
| 134 | for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) { | ||
| 135 | for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) { | ||
| 136 | dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | private: | ||
| 142 | uint8_t getIdx(int i) const { | ||
| 143 | int offset = i * 3 + 16; | ||
| 144 | return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset); | ||
| 145 | } | ||
| 146 | |||
| 147 | uint64_t data; | ||
| 148 | }; | ||
| 149 | static_assert(sizeof(BC_channel) == 8, "BC_channel must be 8 bytes"); | ||
| 150 | |||
| 151 | struct BC_alpha { | ||
| 152 | void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp) const { | ||
| 153 | dst += 3; // Write only to alpha (channel 3) | ||
| 154 | for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) { | ||
| 155 | uint8_t *dstRow = dst; | ||
| 156 | for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) { | ||
| 157 | *dstRow = getAlpha(j * BlockHeight + i); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | private: | ||
| 163 | uint8_t getAlpha(int i) const { | ||
| 164 | int offset = i << 2; | ||
| 165 | int alpha = (data & (0xFull << offset)) >> offset; | ||
| 166 | return static_cast<uint8_t>(alpha | (alpha << 4)); | ||
| 167 | } | ||
| 168 | |||
| 169 | uint64_t data; | ||
| 170 | }; | ||
| 171 | static_assert(sizeof(BC_alpha) == 8, "BC_alpha must be 8 bytes"); | ||
| 172 | |||
| 173 | namespace BC6H { | ||
| 174 | static constexpr int MaxPartitions = 64; | ||
| 175 | |||
| 176 | // @fmt:off | ||
| 177 | |||
| 178 | static constexpr uint8_t PartitionTable2[MaxPartitions][16] = { | ||
| 179 | { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, | ||
| 180 | { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, | ||
| 181 | { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, | ||
| 182 | { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, | ||
| 183 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, | ||
| 184 | { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 185 | { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 186 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, | ||
| 187 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, | ||
| 188 | { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 189 | { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 190 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, | ||
| 191 | { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 192 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 193 | { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, | ||
| 194 | { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, | ||
| 195 | { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, | ||
| 196 | { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
| 197 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, | ||
| 198 | { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, | ||
| 199 | { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, | ||
| 200 | { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, | ||
| 201 | { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, | ||
| 202 | { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, | ||
| 203 | { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, | ||
| 204 | { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, | ||
| 205 | { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, | ||
| 206 | { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, | ||
| 207 | { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, | ||
| 208 | { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, | ||
| 209 | { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, | ||
| 210 | { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, | ||
| 211 | { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, | ||
| 212 | { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, | ||
| 213 | { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, | ||
| 214 | { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, | ||
| 215 | { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, | ||
| 216 | { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, | ||
| 217 | { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, | ||
| 218 | { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, | ||
| 219 | { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, | ||
| 220 | { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, | ||
| 221 | { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, | ||
| 222 | { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, | ||
| 223 | { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, | ||
| 224 | { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, | ||
| 225 | { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, | ||
| 226 | { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, | ||
| 227 | { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, | ||
| 228 | { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, | ||
| 229 | { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, | ||
| 230 | { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, | ||
| 231 | { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, | ||
| 232 | { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, | ||
| 233 | { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, | ||
| 234 | { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, | ||
| 235 | { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, | ||
| 236 | { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, | ||
| 237 | { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, | ||
| 238 | { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, | ||
| 239 | { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, | ||
| 240 | { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, | ||
| 241 | { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, | ||
| 242 | { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 }, | ||
| 243 | }; | ||
| 244 | |||
| 245 | static constexpr uint8_t AnchorTable2[MaxPartitions] = { | ||
| 246 | 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, | ||
| 247 | 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, | ||
| 248 | 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, | ||
| 249 | 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, | ||
| 250 | 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, | ||
| 251 | 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, | ||
| 252 | 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, | ||
| 253 | 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, | ||
| 254 | }; | ||
| 255 | |||
| 256 | // @fmt:on | ||
| 257 | |||
| 258 | // 1.0f in half-precision floating point format | ||
| 259 | static constexpr uint16_t halfFloat1 = 0x3C00; | ||
| 260 | union Color { | ||
| 261 | struct RGBA { | ||
| 262 | uint16_t r = 0; | ||
| 263 | uint16_t g = 0; | ||
| 264 | uint16_t b = 0; | ||
| 265 | uint16_t a = halfFloat1; | ||
| 266 | |||
| 267 | RGBA(uint16_t r, uint16_t g, uint16_t b) | ||
| 268 | : r(r), g(g), b(b) { | ||
| 269 | } | ||
| 270 | |||
| 271 | RGBA &operator=(const RGBA &other) { | ||
| 272 | this->r = other.r; | ||
| 273 | this->g = other.g; | ||
| 274 | this->b = other.b; | ||
| 275 | this->a = halfFloat1; | ||
| 276 | |||
| 277 | return *this; | ||
| 278 | } | ||
| 279 | }; | ||
| 280 | |||
| 281 | Color(uint16_t r, uint16_t g, uint16_t b) | ||
| 282 | : rgba(r, g, b) { | ||
| 283 | } | ||
| 284 | |||
| 285 | Color(int r, int g, int b) | ||
| 286 | : rgba((uint16_t) r, (uint16_t) g, (uint16_t) b) { | ||
| 287 | } | ||
| 288 | |||
| 289 | Color() {} | ||
| 290 | |||
| 291 | Color(const Color &other) { | ||
| 292 | this->rgba = other.rgba; | ||
| 293 | } | ||
| 294 | |||
| 295 | Color &operator=(const Color &other) { | ||
| 296 | this->rgba = other.rgba; | ||
| 297 | |||
| 298 | return *this; | ||
| 299 | } | ||
| 300 | |||
| 301 | RGBA rgba; | ||
| 302 | uint16_t channel[4]; | ||
| 303 | }; | ||
| 304 | static_assert(sizeof(Color) == 8, "BC6h::Color must be 8 bytes long"); | ||
| 305 | |||
| 306 | inline int32_t extendSign(int32_t val, size_t size) { | ||
| 307 | // Suppose we have a 2-bit integer being stored in 4 bit variable: | ||
| 308 | // x = 0b00AB | ||
| 309 | // | ||
| 310 | // In order to sign extend x, we need to turn the 0s into A's: | ||
| 311 | // x_extend = 0bAAAB | ||
| 312 | // | ||
| 313 | // We can do that by flipping A in x then subtracting 0b0010 from x. | ||
| 314 | // Suppose A is 1: | ||
| 315 | // x = 0b001B | ||
| 316 | // x_flip = 0b000B | ||
| 317 | // x_minus = 0b111B | ||
| 318 | // Since A is flipped to 0, subtracting the mask sets it and all the bits above it to 1. | ||
| 319 | // And if A is 0: | ||
| 320 | // x = 0b000B | ||
| 321 | // x_flip = 0b001B | ||
| 322 | // x_minus = 0b000B | ||
| 323 | // We unset the bit we flipped, and touch no other bit | ||
| 324 | uint16_t mask = 1u << (size - 1); | ||
| 325 | return (val ^ mask) - mask; | ||
| 326 | } | ||
| 327 | |||
| 328 | static int constexpr RGBfChannels = 3; | ||
| 329 | struct RGBf { | ||
| 330 | uint16_t channel[RGBfChannels]; | ||
| 331 | size_t size[RGBfChannels]; | ||
| 332 | bool isSigned; | ||
| 333 | |||
| 334 | RGBf() { | ||
| 335 | static_assert(RGBfChannels == 3, "RGBf must have exactly 3 channels"); | ||
| 336 | static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels"); | ||
| 337 | static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes"); | ||
| 338 | |||
| 339 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 340 | channel[i] = 0; | ||
| 341 | size[i] = 0; | ||
| 342 | } | ||
| 343 | |||
| 344 | isSigned = false; | ||
| 345 | } | ||
| 346 | |||
| 347 | void extendSign() { | ||
| 348 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 349 | channel[i] = BC6H::extendSign(channel[i], size[i]); | ||
| 350 | } | ||
| 351 | } | ||
| 352 | |||
| 353 | // Assuming this is the delta, take the base-endpoint and transform this into | ||
| 354 | // a proper endpoint. | ||
| 355 | // | ||
| 356 | // The final computed endpoint is truncated to the base-endpoint's size; | ||
| 357 | void resolveDelta(RGBf base) { | ||
| 358 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 359 | size[i] = base.size[i]; | ||
| 360 | channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1); | ||
| 361 | } | ||
| 362 | |||
| 363 | // Per the spec: | ||
| 364 | // "For signed formats, the results of the delta calculation must be sign | ||
| 365 | // extended as well." | ||
| 366 | if (isSigned) { | ||
| 367 | extendSign(); | ||
| 368 | } | ||
| 369 | } | ||
| 370 | |||
| 371 | void unquantize() { | ||
| 372 | if (isSigned) { | ||
| 373 | unquantizeSigned(); | ||
| 374 | } else { | ||
| 375 | unquantizeUnsigned(); | ||
| 376 | } | ||
| 377 | } | ||
| 378 | |||
| 379 | void unquantizeUnsigned() { | ||
| 380 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 381 | if (size[i] >= 15 || channel[i] == 0) { | ||
| 382 | continue; | ||
| 383 | } else if (channel[i] == ((1u << size[i]) - 1)) { | ||
| 384 | channel[i] = 0xFFFFu; | ||
| 385 | } else { | ||
| 386 | // Need 32 bits to avoid overflow | ||
| 387 | uint32_t tmp = channel[i]; | ||
| 388 | channel[i] = (uint16_t) (((tmp << 16) + 0x8000) >> size[i]); | ||
| 389 | } | ||
| 390 | size[i] = 16; | ||
| 391 | } | ||
| 392 | } | ||
| 393 | |||
| 394 | void unquantizeSigned() { | ||
| 395 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 396 | if (size[i] >= 16 || channel[i] == 0) { | ||
| 397 | continue; | ||
| 398 | } | ||
| 399 | |||
| 400 | int16_t value = (int16_t)channel[i]; | ||
| 401 | int32_t result = value; | ||
| 402 | bool signBit = value < 0; | ||
| 403 | if (signBit) { | ||
| 404 | value = -value; | ||
| 405 | } | ||
| 406 | |||
| 407 | if (value >= ((1 << (size[i] - 1)) - 1)) { | ||
| 408 | result = 0x7FFF; | ||
| 409 | } else { | ||
| 410 | // Need 32 bits to avoid overflow | ||
| 411 | int32_t tmp = value; | ||
| 412 | result = (((tmp << 15) + 0x4000) >> (size[i] - 1)); | ||
| 413 | } | ||
| 414 | |||
| 415 | if (signBit) { | ||
| 416 | result = -result; | ||
| 417 | } | ||
| 418 | |||
| 419 | channel[i] = (uint16_t) result; | ||
| 420 | size[i] = 16; | ||
| 421 | } | ||
| 422 | } | ||
| 423 | }; | ||
| 424 | |||
| 425 | struct Data { | ||
| 426 | uint64_t low64; | ||
| 427 | uint64_t high64; | ||
| 428 | |||
| 429 | Data() = default; | ||
| 430 | |||
| 431 | Data(uint64_t low64, uint64_t high64) | ||
| 432 | : low64(low64), high64(high64) { | ||
| 433 | } | ||
| 434 | |||
| 435 | // Consumes the lowest N bits from from low64 and high64 where N is: | ||
| 436 | // abs(MSB - LSB) | ||
| 437 | // MSB and LSB come from the block description of the BC6h spec and specify | ||
| 438 | // the location of the bits in the returned bitstring. | ||
| 439 | // | ||
| 440 | // If MSB < LSB, then the bits are reversed. Otherwise, the bitstring is read and | ||
| 441 | // shifted without further modification. | ||
| 442 | // | ||
| 443 | uint32_t consumeBits(uint32_t MSB, uint32_t LSB) { | ||
| 444 | bool reversed = MSB < LSB; | ||
| 445 | if (reversed) { | ||
| 446 | std::swap(MSB, LSB); | ||
| 447 | } | ||
| 448 | assert(MSB - LSB + 1 < sizeof(uint32_t) * 8); | ||
| 449 | |||
| 450 | uint32_t numBits = MSB - LSB + 1; | ||
| 451 | uint32_t mask = (1 << numBits) - 1; | ||
| 452 | // Read the low N bits | ||
| 453 | uint32_t bits = (low64 & mask); | ||
| 454 | |||
| 455 | low64 >>= numBits; | ||
| 456 | // Put the low N bits of high64 into the high 64-N bits of low64 | ||
| 457 | low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits); | ||
| 458 | high64 >>= numBits; | ||
| 459 | |||
| 460 | if (reversed) { | ||
| 461 | uint32_t tmp = 0; | ||
| 462 | for (uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++) { | ||
| 463 | tmp <<= 1; | ||
| 464 | tmp |= (bits & 1); | ||
| 465 | bits >>= 1; | ||
| 466 | } | ||
| 467 | |||
| 468 | bits = tmp; | ||
| 469 | } | ||
| 470 | |||
| 471 | return bits << LSB; | ||
| 472 | } | ||
| 473 | }; | ||
| 474 | |||
| 475 | struct IndexInfo { | ||
| 476 | uint64_t value; | ||
| 477 | int numBits; | ||
| 478 | }; | ||
| 479 | |||
| 480 | // Interpolates between two endpoints, then does a final unquantization step | ||
| 481 | Color interpolate(RGBf e0, RGBf e1, const IndexInfo &index, bool isSigned) { | ||
| 482 | static constexpr uint32_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64}; | ||
| 483 | static constexpr uint32_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30, | ||
| 484 | 34, 38, 43, 47, 51, 55, 60, 64}; | ||
| 485 | static constexpr uint32_t const *weightsN[] = { | ||
| 486 | nullptr, nullptr, nullptr, weights3, weights4 | ||
| 487 | }; | ||
| 488 | auto weights = weightsN[index.numBits]; | ||
| 489 | assert(weights != nullptr); | ||
| 490 | Color color; | ||
| 491 | uint32_t e0Weight = 64 - weights[index.value]; | ||
| 492 | uint32_t e1Weight = weights[index.value]; | ||
| 493 | |||
| 494 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 495 | int32_t e0Channel = e0.channel[i]; | ||
| 496 | int32_t e1Channel = e1.channel[i]; | ||
| 497 | |||
| 498 | if (isSigned) { | ||
| 499 | e0Channel = extendSign(e0Channel, 16); | ||
| 500 | e1Channel = extendSign(e1Channel, 16); | ||
| 501 | } | ||
| 502 | |||
| 503 | int32_t e0Value = e0Channel * e0Weight; | ||
| 504 | int32_t e1Value = e1Channel * e1Weight; | ||
| 505 | |||
| 506 | uint32_t tmp = ((e0Value + e1Value + 32) >> 6); | ||
| 507 | |||
| 508 | // Need to unquantize value to limit it to the legal range of half-precision | ||
| 509 | // floats. We do this by scaling by 31/32 or 31/64 depending on if the value | ||
| 510 | // is signed or unsigned. | ||
| 511 | if (isSigned) { | ||
| 512 | tmp = ((tmp & 0x80000000) != 0) ? (((~tmp + 1) * 31) >> 5) | 0x8000 : (tmp * 31) >> 5; | ||
| 513 | // Don't return -0.0f, just normalize it to 0.0f. | ||
| 514 | if (tmp == 0x8000) | ||
| 515 | tmp = 0; | ||
| 516 | } else { | ||
| 517 | tmp = (tmp * 31) >> 6; | ||
| 518 | } | ||
| 519 | |||
| 520 | color.channel[i] = (uint16_t) tmp; | ||
| 521 | } | ||
| 522 | |||
| 523 | return color; | ||
| 524 | } | ||
| 525 | |||
| 526 | enum DataType { | ||
| 527 | // Endpoints | ||
| 528 | EP0 = 0, | ||
| 529 | EP1 = 1, | ||
| 530 | EP2 = 2, | ||
| 531 | EP3 = 3, | ||
| 532 | Mode, | ||
| 533 | Partition, | ||
| 534 | End, | ||
| 535 | }; | ||
| 536 | |||
| 537 | enum Channel { | ||
| 538 | R = 0, | ||
| 539 | G = 1, | ||
| 540 | B = 2, | ||
| 541 | None, | ||
| 542 | }; | ||
| 543 | |||
| 544 | struct DeltaBits { | ||
| 545 | size_t channel[3]; | ||
| 546 | |||
| 547 | constexpr DeltaBits() | ||
| 548 | : channel{0, 0, 0} { | ||
| 549 | } | ||
| 550 | |||
| 551 | constexpr DeltaBits(size_t r, size_t g, size_t b) | ||
| 552 | : channel{r, g, b} { | ||
| 553 | } | ||
| 554 | }; | ||
| 555 | |||
| 556 | struct ModeDesc { | ||
| 557 | int number; | ||
| 558 | bool hasDelta; | ||
| 559 | int partitionCount; | ||
| 560 | int endpointBits; | ||
| 561 | DeltaBits deltaBits; | ||
| 562 | |||
| 563 | constexpr ModeDesc() | ||
| 564 | : number(-1), hasDelta(false), partitionCount(0), endpointBits(0) { | ||
| 565 | } | ||
| 566 | |||
| 567 | constexpr ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits) | ||
| 568 | : number(number), hasDelta(hasDelta), partitionCount(partitionCount), endpointBits(endpointBits), deltaBits(deltaBits) { | ||
| 569 | } | ||
| 570 | }; | ||
| 571 | |||
| 572 | struct BlockDesc { | ||
| 573 | DataType type; | ||
| 574 | Channel channel; | ||
| 575 | int MSB; | ||
| 576 | int LSB; | ||
| 577 | ModeDesc modeDesc; | ||
| 578 | |||
| 579 | constexpr BlockDesc() | ||
| 580 | : type(End), channel(None), MSB(0), LSB(0), modeDesc() { | ||
| 581 | } | ||
| 582 | |||
| 583 | constexpr BlockDesc(const DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc) | ||
| 584 | : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc(modeDesc) { | ||
| 585 | } | ||
| 586 | |||
| 587 | constexpr BlockDesc(DataType type, Channel channel, int MSB, int LSB) | ||
| 588 | : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc() { | ||
| 589 | } | ||
| 590 | }; | ||
| 591 | |||
| 592 | // Turns a legal mode into an index into the BlockDesc table. | ||
| 593 | // Illegal or reserved modes return -1. | ||
| 594 | static int modeToIndex(uint8_t mode) { | ||
| 595 | if (mode <= 3) { | ||
| 596 | return mode; | ||
| 597 | } else if ((mode & 0x2) != 0) { | ||
| 598 | if (mode <= 18) { | ||
| 599 | // Turns 6 into 4, 7 into 5, 10 into 6, etc. | ||
| 600 | return (mode / 2) + 1 + (mode & 0x1); | ||
| 601 | } else if (mode == 22 || mode == 26 || mode == 30) { | ||
| 602 | // Turns 22 into 11, 26 into 12, etc. | ||
| 603 | return mode / 4 + 6; | ||
| 604 | } | ||
| 605 | } | ||
| 606 | |||
| 607 | return -1; | ||
| 608 | } | ||
| 609 | |||
| 610 | // Returns a description of the bitfields for each mode from the LSB | ||
| 611 | // to the MSB before the index data starts. | ||
| 612 | // | ||
| 613 | // The numbers come from the BC6h block description. Each BlockDesc in the | ||
| 614 | // {Type, Channel, MSB, LSB} | ||
| 615 | // * Type describes which endpoint this is, or if this is a mode, a partition | ||
| 616 | // number, or the end of the block description. | ||
| 617 | // * Channel describes one of the 3 color channels within an endpoint | ||
| 618 | // * MSB and LSB specificy: | ||
| 619 | // * The size of the bitfield being read | ||
| 620 | // * The position of the bitfield within the variable it is being read to | ||
| 621 | // * If the bitfield is stored in reverse bit order | ||
| 622 | // If MSB < LSB then the bitfield is stored in reverse order. The size of | ||
| 623 | // the bitfield is abs(MSB-LSB+1). And the position of the bitfield within | ||
| 624 | // the variable is min(LSB, MSB). | ||
| 625 | // | ||
| 626 | // Invalid or reserved modes return an empty list. | ||
| 627 | static constexpr int NumBlocks = 14; | ||
| 628 | // The largest number of descriptions within a block. | ||
| 629 | static constexpr int MaxBlockDescIndex = 26; | ||
| 630 | static constexpr BlockDesc blockDescs[NumBlocks][MaxBlockDescIndex] = { | ||
| 631 | // @fmt:off | ||
| 632 | // Mode 0, Index 0 | ||
| 633 | { | ||
| 634 | { Mode, None, 1, 0, { 0, true, 2, 10, { 5, 5, 5 } } }, | ||
| 635 | { EP2, G, 4, 4 }, { EP2, B, 4, 4 }, { EP3, B, 4, 4 }, | ||
| 636 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 637 | { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 }, | ||
| 638 | { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, | ||
| 639 | { EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, | ||
| 640 | { EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, | ||
| 641 | { EP3, B, 3, 3 }, | ||
| 642 | { Partition, None, 4, 0 }, | ||
| 643 | { End, None, 0, 0}, | ||
| 644 | }, | ||
| 645 | // Mode 1, Index 1 | ||
| 646 | { | ||
| 647 | { Mode, None, 1, 0, { 1, true, 2, 7, { 6, 6, 6 } } }, | ||
| 648 | { EP2, G, 5, 5 }, { EP3, G, 5, 4 }, { EP0, R, 6, 0 }, | ||
| 649 | { EP3, B, 1, 0 }, { EP2, B, 4, 4 }, { EP0, G, 6, 0 }, | ||
| 650 | { EP2, B, 5, 5 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 }, | ||
| 651 | { EP0, B, 6, 0 }, { EP3, B, 3, 3 }, { EP3, B, 5, 5 }, | ||
| 652 | { EP3, B, 4, 4 }, { EP1, R, 5, 0 }, { EP2, G, 3, 0 }, | ||
| 653 | { EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 5, 0 }, | ||
| 654 | { EP2, B, 3, 0 }, { EP2, R, 5, 0 }, { EP3, R, 5, 0 }, | ||
| 655 | { Partition, None, 4, 0 }, | ||
| 656 | { End, None, 0, 0}, | ||
| 657 | }, | ||
| 658 | // Mode 2, Index 2 | ||
| 659 | { | ||
| 660 | { Mode, None, 4, 0, { 2, true, 2, 11, { 5, 4, 4 } } }, | ||
| 661 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 662 | { EP1, R, 4, 0 }, { EP0, R, 10, 10 }, { EP2, G, 3, 0 }, | ||
| 663 | { EP1, G, 3, 0 }, { EP0, G, 10, 10 }, { EP3, B, 0, 0 }, | ||
| 664 | { EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 }, | ||
| 665 | { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 }, | ||
| 666 | { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 }, | ||
| 667 | { Partition, None, 4, 0 }, | ||
| 668 | { End, None, 0, 0}, | ||
| 669 | }, | ||
| 670 | // Mode 3, Index 3 | ||
| 671 | { | ||
| 672 | { Mode, None, 4, 0, { 3, false, 1, 10, { 0, 0, 0 } } }, | ||
| 673 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 674 | { EP1, R, 9, 0 }, { EP1, G, 9, 0 }, { EP1, B, 9, 0 }, | ||
| 675 | { End, None, 0, 0}, | ||
| 676 | }, | ||
| 677 | // Mode 6, Index 4 | ||
| 678 | { | ||
| 679 | { Mode, None, 4, 0, { 6, true, 2, 11, { 4, 5, 4 } } }, // 1 1 | ||
| 680 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 681 | { EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP3, G, 4, 4 }, | ||
| 682 | { EP2, G, 3, 0 }, { EP1, G, 4, 0 }, { EP0, G, 10, 10 }, | ||
| 683 | { EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 }, | ||
| 684 | { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 }, | ||
| 685 | { EP3, B, 0, 0 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 }, // 18 19 | ||
| 686 | { EP2, G, 4, 4 }, { EP3, B, 3, 3 }, // 2 21 | ||
| 687 | { Partition, None, 4, 0 }, | ||
| 688 | { End, None, 0, 0}, | ||
| 689 | }, | ||
| 690 | // Mode 7, Index 5 | ||
| 691 | { | ||
| 692 | { Mode, None, 4, 0, { 7, true, 1, 11, { 9, 9, 9 } } }, | ||
| 693 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 694 | { EP1, R, 8, 0 }, { EP0, R, 10, 10 }, { EP1, G, 8, 0 }, | ||
| 695 | { EP0, G, 10, 10 }, { EP1, B, 8, 0 }, { EP0, B, 10, 10 }, | ||
| 696 | { End, None, 0, 0}, | ||
| 697 | }, | ||
| 698 | // Mode 10, Index 6 | ||
| 699 | { | ||
| 700 | { Mode, None, 4, 0, { 10, true, 2, 11, { 4, 4, 5 } } }, | ||
| 701 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 702 | { EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP2, B, 4, 4 }, | ||
| 703 | { EP2, G, 3, 0 }, { EP1, G, 3, 0 }, { EP0, G, 10, 10 }, | ||
| 704 | { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 }, | ||
| 705 | { EP0, B, 10, 10 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 }, | ||
| 706 | { EP3, B, 1, 1 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 }, | ||
| 707 | { EP3, B, 4, 4 }, { EP3, B, 3, 3 }, | ||
| 708 | { Partition, None, 4, 0 }, | ||
| 709 | { End, None, 0, 0}, | ||
| 710 | }, | ||
| 711 | // Mode 11, Index 7 | ||
| 712 | { | ||
| 713 | { Mode, None, 4, 0, { 11, true, 1, 12, { 8, 8, 8 } } }, | ||
| 714 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 715 | { EP1, R, 7, 0 }, { EP0, R, 10, 11 }, { EP1, G, 7, 0 }, | ||
| 716 | { EP0, G, 10, 11 }, { EP1, B, 7, 0 }, { EP0, B, 10, 11 }, | ||
| 717 | { End, None, 0, 0}, | ||
| 718 | }, | ||
| 719 | // Mode 14, Index 8 | ||
| 720 | { | ||
| 721 | { Mode, None, 4, 0, { 14, true, 2, 9, { 5, 5, 5 } } }, | ||
| 722 | { EP0, R, 8, 0 }, { EP2, B, 4, 4 }, { EP0, G, 8, 0 }, | ||
| 723 | { EP2, G, 4, 4 }, { EP0, B, 8, 0 }, { EP3, B, 4, 4 }, | ||
| 724 | { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 }, | ||
| 725 | { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, | ||
| 726 | { EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, | ||
| 727 | { EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, | ||
| 728 | { EP3, B, 3, 3 }, | ||
| 729 | { Partition, None, 4, 0 }, | ||
| 730 | { End, None, 0, 0}, | ||
| 731 | }, | ||
| 732 | // Mode 15, Index 9 | ||
| 733 | { | ||
| 734 | { Mode, None, 4, 0, { 15, true, 1, 16, { 4, 4, 4 } } }, | ||
| 735 | { EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 }, | ||
| 736 | { EP1, R, 3, 0 }, { EP0, R, 10, 15 }, { EP1, G, 3, 0 }, | ||
| 737 | { EP0, G, 10, 15 }, { EP1, B, 3, 0 }, { EP0, B, 10, 15 }, | ||
| 738 | { End, None, 0, 0}, | ||
| 739 | }, | ||
| 740 | // Mode 18, Index 10 | ||
| 741 | { | ||
| 742 | { Mode, None, 4, 0, { 18, true, 2, 8, { 6, 5, 5 } } }, | ||
| 743 | { EP0, R, 7, 0 }, { EP3, G, 4, 4 }, { EP2, B, 4, 4 }, | ||
| 744 | { EP0, G, 7, 0 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 }, | ||
| 745 | { EP0, B, 7, 0 }, { EP3, B, 3, 3 }, { EP3, B, 4, 4 }, | ||
| 746 | { EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 4, 0 }, | ||
| 747 | { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 }, | ||
| 748 | { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 5, 0 }, | ||
| 749 | { EP3, R, 5, 0 }, | ||
| 750 | { Partition, None, 4, 0 }, | ||
| 751 | { End, None, 0, 0}, | ||
| 752 | }, | ||
| 753 | // Mode 22, Index 11 | ||
| 754 | { | ||
| 755 | { Mode, None, 4, 0, { 22, true, 2, 8, { 5, 6, 5 } } }, | ||
| 756 | { EP0, R, 7, 0 }, { EP3, B, 0, 0 }, { EP2, B, 4, 4 }, | ||
| 757 | { EP0, G, 7, 0 }, { EP2, G, 5, 5 }, { EP2, G, 4, 4 }, | ||
| 758 | { EP0, B, 7, 0 }, { EP3, G, 5, 5 }, { EP3, B, 4, 4 }, | ||
| 759 | { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 }, | ||
| 760 | { EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 }, | ||
| 761 | { EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 }, | ||
| 762 | { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 }, | ||
| 763 | { Partition, None, 4, 0 }, | ||
| 764 | { End, None, 0, 0}, | ||
| 765 | }, | ||
| 766 | // Mode 26, Index 12 | ||
| 767 | { | ||
| 768 | { Mode, None, 4, 0, { 26, true, 2, 8, { 5, 5, 6 } } }, | ||
| 769 | { EP0, R, 7, 0 }, { EP3, B, 1, 1 }, { EP2, B, 4, 4 }, | ||
| 770 | { EP0, G, 7, 0 }, { EP2, B, 5, 5 }, { EP2, G, 4, 4 }, | ||
| 771 | { EP0, B, 7, 0 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 }, | ||
| 772 | { EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 }, | ||
| 773 | { EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 }, | ||
| 774 | { EP1, B, 5, 0 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 }, | ||
| 775 | { EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 }, | ||
| 776 | { Partition, None, 4, 0 }, | ||
| 777 | { End, None, 0, 0}, | ||
| 778 | }, | ||
| 779 | // Mode 30, Index 13 | ||
| 780 | { | ||
| 781 | { Mode, None, 4, 0, { 30, false, 2, 6, { 0, 0, 0 } } }, | ||
| 782 | { EP0, R, 5, 0 }, { EP3, G, 4, 4 }, { EP3, B, 0, 0 }, | ||
| 783 | { EP3, B, 1, 1 }, { EP2, B, 4, 4 }, { EP0, G, 5, 0 }, | ||
| 784 | { EP2, G, 5, 5 }, { EP2, B, 5, 5 }, { EP3, B, 2, 2 }, | ||
| 785 | { EP2, G, 4, 4 }, { EP0, B, 5, 0 }, { EP3, G, 5, 5 }, | ||
| 786 | { EP3, B, 3, 3 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 }, | ||
| 787 | { EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 5, 0 }, | ||
| 788 | { EP3, G, 3, 0 }, { EP1, B, 5, 0 }, { EP2, B, 3, 0 }, | ||
| 789 | { EP2, R, 5, 0 }, { EP3, R, 5, 0 }, | ||
| 790 | { Partition, None, 4, 0 }, | ||
| 791 | { End, None, 0, 0}, | ||
| 792 | } | ||
| 793 | // @fmt:on | ||
| 794 | }; | ||
| 795 | |||
| 796 | struct Block { | ||
| 797 | uint64_t low64; | ||
| 798 | uint64_t high64; | ||
| 799 | |||
| 800 | void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch, size_t dstBpp, bool isSigned) const { | ||
| 801 | uint8_t mode = 0; | ||
| 802 | Data data(low64, high64); | ||
| 803 | assert(dstBpp == sizeof(Color)); | ||
| 804 | |||
| 805 | if ((data.low64 & 0x2) == 0) { | ||
| 806 | mode = data.consumeBits(1, 0); | ||
| 807 | } else { | ||
| 808 | mode = data.consumeBits(4, 0); | ||
| 809 | } | ||
| 810 | |||
| 811 | int blockIndex = modeToIndex(mode); | ||
| 812 | // Handle illegal or reserved mode | ||
| 813 | if (blockIndex == -1) { | ||
| 814 | for (int y = 0; y < 4 && y + dstY < dstHeight; y++) { | ||
| 815 | for (int x = 0; x < 4 && x + dstX < dstWidth; x++) { | ||
| 816 | auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y); | ||
| 817 | out->rgba = {0, 0, 0}; | ||
| 818 | } | ||
| 819 | } | ||
| 820 | return; | ||
| 821 | } | ||
| 822 | const BlockDesc *blockDesc = blockDescs[blockIndex]; | ||
| 823 | |||
| 824 | RGBf e[4]; | ||
| 825 | e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned; | ||
| 826 | |||
| 827 | int partition = 0; | ||
| 828 | ModeDesc modeDesc; | ||
| 829 | for (int index = 0; blockDesc[index].type != End; index++) { | ||
| 830 | const BlockDesc desc = blockDesc[index]; | ||
| 831 | |||
| 832 | switch (desc.type) { | ||
| 833 | case Mode: | ||
| 834 | modeDesc = desc.modeDesc; | ||
| 835 | assert(modeDesc.number == mode); | ||
| 836 | |||
| 837 | e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits; | ||
| 838 | for (int i = 0; i < RGBfChannels; i++) { | ||
| 839 | if (modeDesc.hasDelta) { | ||
| 840 | e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i]; | ||
| 841 | } else { | ||
| 842 | e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.endpointBits; | ||
| 843 | } | ||
| 844 | } | ||
| 845 | break; | ||
| 846 | case Partition: | ||
| 847 | partition |= data.consumeBits(desc.MSB, desc.LSB); | ||
| 848 | break; | ||
| 849 | case EP0: | ||
| 850 | case EP1: | ||
| 851 | case EP2: | ||
| 852 | case EP3: | ||
| 853 | e[desc.type].channel[desc.channel] |= data.consumeBits(desc.MSB, desc.LSB); | ||
| 854 | break; | ||
| 855 | default: | ||
| 856 | assert(false); | ||
| 857 | return; | ||
| 858 | } | ||
| 859 | } | ||
| 860 | |||
| 861 | // Sign extension | ||
| 862 | if (isSigned) { | ||
| 863 | for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) { | ||
| 864 | e[ep].extendSign(); | ||
| 865 | } | ||
| 866 | } else if (modeDesc.hasDelta) { | ||
| 867 | // Don't sign-extend the base endpoint in an unsigned format. | ||
| 868 | for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) { | ||
| 869 | e[ep].extendSign(); | ||
| 870 | } | ||
| 871 | } | ||
| 872 | |||
| 873 | // Turn the deltas into endpoints | ||
| 874 | if (modeDesc.hasDelta) { | ||
| 875 | for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) { | ||
| 876 | e[ep].resolveDelta(e[0]); | ||
| 877 | } | ||
| 878 | } | ||
| 879 | |||
| 880 | for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) { | ||
| 881 | e[ep].unquantize(); | ||
| 882 | } | ||
| 883 | |||
| 884 | // Get the indices, calculate final colors, and output | ||
| 885 | for (int y = 0; y < 4; y++) { | ||
| 886 | for (int x = 0; x < 4; x++) { | ||
| 887 | int pixelNum = x + y * 4; | ||
| 888 | IndexInfo idx; | ||
| 889 | bool isAnchor = false; | ||
| 890 | int firstEndpoint = 0; | ||
| 891 | // Bc6H can have either 1 or 2 petitions depending on the mode. | ||
| 892 | // The number of petitions affects the number of indices with implicit | ||
| 893 | // leading 0 bits and the number of bits per index. | ||
| 894 | if (modeDesc.partitionCount == 1) { | ||
| 895 | idx.numBits = 4; | ||
| 896 | // There's an implicit leading 0 bit for the first idx | ||
| 897 | isAnchor = (pixelNum == 0); | ||
| 898 | } else { | ||
| 899 | idx.numBits = 3; | ||
| 900 | // There are 2 indices with implicit leading 0-bits. | ||
| 901 | isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition])); | ||
| 902 | firstEndpoint = PartitionTable2[partition][pixelNum] * 2; | ||
| 903 | } | ||
| 904 | |||
| 905 | idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0); | ||
| 906 | |||
| 907 | // Don't exit the loop early, we need to consume these index bits regardless if | ||
| 908 | // we actually output them or not. | ||
| 909 | if ((y + dstY >= dstHeight) || (x + dstX >= dstWidth)) { | ||
| 910 | continue; | ||
| 911 | } | ||
| 912 | |||
| 913 | Color color = interpolate(e[firstEndpoint], e[firstEndpoint + 1], idx, isSigned); | ||
| 914 | auto out = reinterpret_cast<Color *>(dst + dstBpp * x + dstPitch * y); | ||
| 915 | *out = color; | ||
| 916 | } | ||
| 917 | } | ||
| 918 | } | ||
| 919 | }; | ||
| 920 | |||
| 921 | } // namespace BC6H | ||
| 922 | |||
| 923 | namespace BC7 { | ||
| 924 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt | ||
| 925 | // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format | ||
| 926 | |||
| 927 | struct Bitfield { | ||
| 928 | int offset; | ||
| 929 | int count; | ||
| 930 | |||
| 931 | constexpr Bitfield Then(const int bits) { return {offset + count, bits}; } | ||
| 932 | |||
| 933 | constexpr bool operator==(const Bitfield &rhs) { | ||
| 934 | return offset == rhs.offset && count == rhs.count; | ||
| 935 | } | ||
| 936 | }; | ||
| 937 | |||
| 938 | struct Mode { | ||
| 939 | const int IDX; // Mode index | ||
| 940 | const int NS; // Number of subsets in each partition | ||
| 941 | const int PB; // Partition bits | ||
| 942 | const int RB; // Rotation bits | ||
| 943 | const int ISB; // Index selection bits | ||
| 944 | const int CB; // Color bits | ||
| 945 | const int AB; // Alpha bits | ||
| 946 | const int EPB; // Endpoint P-bits | ||
| 947 | const int SPB; // Shared P-bits | ||
| 948 | const int IB; // Primary index bits per element | ||
| 949 | const int IBC; // Primary index bits total | ||
| 950 | const int IB2; // Secondary index bits per element | ||
| 951 | |||
| 952 | constexpr int NumColors() const { return NS * 2; } | ||
| 953 | |||
| 954 | constexpr Bitfield Partition() const { return {IDX + 1, PB}; } | ||
| 955 | |||
| 956 | constexpr Bitfield Rotation() const { return Partition().Then(RB); } | ||
| 957 | |||
| 958 | constexpr Bitfield IndexSelection() const { return Rotation().Then(ISB); } | ||
| 959 | |||
| 960 | constexpr Bitfield Red(int idx) const { | ||
| 961 | return IndexSelection().Then(CB * idx).Then(CB); | ||
| 962 | } | ||
| 963 | |||
| 964 | constexpr Bitfield Green(int idx) const { | ||
| 965 | return Red(NumColors() - 1).Then(CB * idx).Then(CB); | ||
| 966 | } | ||
| 967 | |||
| 968 | constexpr Bitfield Blue(int idx) const { | ||
| 969 | return Green(NumColors() - 1).Then(CB * idx).Then(CB); | ||
| 970 | } | ||
| 971 | |||
| 972 | constexpr Bitfield Alpha(int idx) const { | ||
| 973 | return Blue(NumColors() - 1).Then(AB * idx).Then(AB); | ||
| 974 | } | ||
| 975 | |||
| 976 | constexpr Bitfield EndpointPBit(int idx) const { | ||
| 977 | return Alpha(NumColors() - 1).Then(EPB * idx).Then(EPB); | ||
| 978 | } | ||
| 979 | |||
| 980 | constexpr Bitfield SharedPBit0() const { | ||
| 981 | return EndpointPBit(NumColors() - 1).Then(SPB); | ||
| 982 | } | ||
| 983 | |||
| 984 | constexpr Bitfield SharedPBit1() const { | ||
| 985 | return SharedPBit0().Then(SPB); | ||
| 986 | } | ||
| 987 | |||
| 988 | constexpr Bitfield PrimaryIndex(int offset, int count) const { | ||
| 989 | return SharedPBit1().Then(offset).Then(count); | ||
| 990 | } | ||
| 991 | |||
| 992 | constexpr Bitfield SecondaryIndex(int offset, int count) const { | ||
| 993 | return SharedPBit1().Then(IBC + offset).Then(count); | ||
| 994 | } | ||
| 995 | }; | ||
| 996 | |||
| 997 | static constexpr Mode Modes[] = { | ||
| 998 | // IDX NS PB RB ISB CB AB EPB SPB IB IBC, IB2 | ||
| 999 | /**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0}, | ||
| 1000 | /**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0}, | ||
| 1001 | /**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0}, | ||
| 1002 | /**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0}, | ||
| 1003 | /**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3}, | ||
| 1004 | /**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2}, | ||
| 1005 | /**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0}, | ||
| 1006 | /**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0}, | ||
| 1007 | /**/ {-1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x00, 0x0}, | ||
| 1008 | }; | ||
| 1009 | |||
| 1010 | static constexpr int MaxPartitions = 64; | ||
| 1011 | static constexpr int MaxSubsets = 3; | ||
| 1012 | |||
| 1013 | static constexpr uint8_t PartitionTable2[MaxPartitions][16] = { | ||
| 1014 | {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, | ||
| 1015 | {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}, | ||
| 1016 | {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1}, | ||
| 1017 | {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1}, | ||
| 1018 | {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1}, | ||
| 1019 | {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1020 | {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1021 | {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1}, | ||
| 1022 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, | ||
| 1023 | {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1024 | {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1025 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1}, | ||
| 1026 | {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1027 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1028 | {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1029 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}, | ||
| 1030 | {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1}, | ||
| 1031 | {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
| 1032 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0}, | ||
| 1033 | {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0}, | ||
| 1034 | {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, | ||
| 1035 | {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0}, | ||
| 1036 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, | ||
| 1037 | {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1}, | ||
| 1038 | {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, | ||
| 1039 | {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, | ||
| 1040 | {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, | ||
| 1041 | {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0}, | ||
| 1042 | {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0}, | ||
| 1043 | {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, | ||
| 1044 | {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0}, | ||
| 1045 | {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0}, | ||
| 1046 | {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, | ||
| 1047 | {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}, | ||
| 1048 | {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0}, | ||
| 1049 | {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0}, | ||
| 1050 | {0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0}, | ||
| 1051 | {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0}, | ||
| 1052 | {0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1}, | ||
| 1053 | {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1}, | ||
| 1054 | {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0}, | ||
| 1055 | {0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0}, | ||
| 1056 | {0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, | ||
| 1057 | {0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0}, | ||
| 1058 | {0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, | ||
| 1059 | {0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1}, | ||
| 1060 | {0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1}, | ||
| 1061 | {0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0}, | ||
| 1062 | {0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, | ||
| 1063 | {0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0}, | ||
| 1064 | {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0}, | ||
| 1065 | {0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0}, | ||
| 1066 | {0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1}, | ||
| 1067 | {0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1}, | ||
| 1068 | {0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0}, | ||
| 1069 | {0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0}, | ||
| 1070 | {0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}, | ||
| 1071 | {0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1}, | ||
| 1072 | {0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1}, | ||
| 1073 | {0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1}, | ||
| 1074 | {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, | ||
| 1075 | {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, | ||
| 1076 | {0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0}, | ||
| 1077 | {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1}, | ||
| 1078 | }; | ||
| 1079 | |||
| 1080 | static constexpr uint8_t PartitionTable3[MaxPartitions][16] = { | ||
| 1081 | {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2}, | ||
| 1082 | {0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1}, | ||
| 1083 | {0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1}, | ||
| 1084 | {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1}, | ||
| 1085 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2}, | ||
| 1086 | {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2}, | ||
| 1087 | {0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1}, | ||
| 1088 | {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1}, | ||
| 1089 | {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}, | ||
| 1090 | {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2}, | ||
| 1091 | {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}, | ||
| 1092 | {0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2}, | ||
| 1093 | {0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2}, | ||
| 1094 | {0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2}, | ||
| 1095 | {0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2}, | ||
| 1096 | {0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0}, | ||
| 1097 | {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2}, | ||
| 1098 | {0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0}, | ||
| 1099 | {0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2}, | ||
| 1100 | {0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1}, | ||
| 1101 | {0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2}, | ||
| 1102 | {0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1}, | ||
| 1103 | {0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2}, | ||
| 1104 | {0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0}, | ||
| 1105 | {0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0}, | ||
| 1106 | {0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2}, | ||
| 1107 | {0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0}, | ||
| 1108 | {0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1}, | ||
| 1109 | {0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2}, | ||
| 1110 | {0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2}, | ||
| 1111 | {0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1}, | ||
| 1112 | {0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1}, | ||
| 1113 | {0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2}, | ||
| 1114 | {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1}, | ||
| 1115 | {0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2}, | ||
| 1116 | {0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0}, | ||
| 1117 | {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0}, | ||
| 1118 | {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, | ||
| 1119 | {0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0}, | ||
| 1120 | {0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1}, | ||
| 1121 | {0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1}, | ||
| 1122 | {0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, | ||
| 1123 | {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1}, | ||
| 1124 | {0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2}, | ||
| 1125 | {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1}, | ||
| 1126 | {0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1}, | ||
| 1127 | {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1}, | ||
| 1128 | {0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1}, | ||
| 1129 | {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2}, | ||
| 1130 | {0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1}, | ||
| 1131 | {0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2}, | ||
| 1132 | {0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2}, | ||
| 1133 | {0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2}, | ||
| 1134 | {0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2}, | ||
| 1135 | {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2}, | ||
| 1136 | {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2}, | ||
| 1137 | {0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2}, | ||
| 1138 | {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2}, | ||
| 1139 | {0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2}, | ||
| 1140 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2}, | ||
| 1141 | {0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1}, | ||
| 1142 | {0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2}, | ||
| 1143 | {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, | ||
| 1144 | {0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0}, | ||
| 1145 | }; | ||
| 1146 | |||
| 1147 | static constexpr uint8_t AnchorTable2[MaxPartitions] = { | ||
| 1148 | // @fmt:off | ||
| 1149 | 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, | ||
| 1150 | 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, | ||
| 1151 | 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, | ||
| 1152 | 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, | ||
| 1153 | 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, | ||
| 1154 | 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, | ||
| 1155 | 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, | ||
| 1156 | 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, | ||
| 1157 | // @fmt:on | ||
| 1158 | }; | ||
| 1159 | |||
| 1160 | static constexpr uint8_t AnchorTable3a[MaxPartitions] = { | ||
| 1161 | // @fmt:off | ||
| 1162 | 0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf, | ||
| 1163 | 0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3, | ||
| 1164 | 0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa, | ||
| 1165 | 0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf, | ||
| 1166 | 0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf, | ||
| 1167 | 0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf, | ||
| 1168 | 0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa, | ||
| 1169 | 0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3, | ||
| 1170 | // @fmt:on | ||
| 1171 | }; | ||
| 1172 | |||
| 1173 | static constexpr uint8_t AnchorTable3b[MaxPartitions] = { | ||
| 1174 | // @fmt:off | ||
| 1175 | 0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8, | ||
| 1176 | 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8, | ||
| 1177 | 0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8, | ||
| 1178 | 0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8, | ||
| 1179 | 0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa, | ||
| 1180 | 0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8, | ||
| 1181 | 0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, | ||
| 1182 | 0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8, | ||
| 1183 | // @fmt:on | ||
| 1184 | }; | ||
| 1185 | |||
| 1186 | struct Color { | ||
| 1187 | struct RGB { | ||
| 1188 | RGB() = default; | ||
| 1189 | |||
| 1190 | RGB(uint8_t r, uint8_t g, uint8_t b) | ||
| 1191 | : b(b), g(g), r(r) {} | ||
| 1192 | |||
| 1193 | RGB(int r, int g, int b) | ||
| 1194 | : b(static_cast<uint8_t>(b)), g(static_cast<uint8_t>(g)), r(static_cast<uint8_t>(r)) {} | ||
| 1195 | |||
| 1196 | RGB operator<<(int shift) const { return {r << shift, g << shift, b << shift}; } | ||
| 1197 | |||
| 1198 | RGB operator>>(int shift) const { return {r >> shift, g >> shift, b >> shift}; } | ||
| 1199 | |||
| 1200 | RGB operator|(int bits) const { return {r | bits, g | bits, b | bits}; } | ||
| 1201 | |||
| 1202 | RGB operator|(const RGB &rhs) const { return {r | rhs.r, g | rhs.g, b | rhs.b}; } | ||
| 1203 | |||
| 1204 | RGB operator+(const RGB &rhs) const { return {r + rhs.r, g + rhs.g, b + rhs.b}; } | ||
| 1205 | |||
| 1206 | uint8_t b; | ||
| 1207 | uint8_t g; | ||
| 1208 | uint8_t r; | ||
| 1209 | }; | ||
| 1210 | |||
| 1211 | RGB rgb; | ||
| 1212 | uint8_t a; | ||
| 1213 | }; | ||
| 1214 | |||
| 1215 | static_assert(sizeof(Color) == 4, "Color size must be 4 bytes"); | ||
| 1216 | |||
| 1217 | struct Block { | ||
| 1218 | constexpr uint64_t Get(const Bitfield &bf) const { | ||
| 1219 | uint64_t mask = (1ULL << bf.count) - 1; | ||
| 1220 | if (bf.offset + bf.count <= 64) { | ||
| 1221 | return (low >> bf.offset) & mask; | ||
| 1222 | } | ||
| 1223 | if (bf.offset >= 64) { | ||
| 1224 | return (high >> (bf.offset - 64)) & mask; | ||
| 1225 | } | ||
| 1226 | return ((low >> bf.offset) | (high << (64 - bf.offset))) & mask; | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | const Mode &mode() const { | ||
| 1230 | if ((low & 0b00000001) != 0) { | ||
| 1231 | return Modes[0]; | ||
| 1232 | } | ||
| 1233 | if ((low & 0b00000010) != 0) { | ||
| 1234 | return Modes[1]; | ||
| 1235 | } | ||
| 1236 | if ((low & 0b00000100) != 0) { | ||
| 1237 | return Modes[2]; | ||
| 1238 | } | ||
| 1239 | if ((low & 0b00001000) != 0) { | ||
| 1240 | return Modes[3]; | ||
| 1241 | } | ||
| 1242 | if ((low & 0b00010000) != 0) { | ||
| 1243 | return Modes[4]; | ||
| 1244 | } | ||
| 1245 | if ((low & 0b00100000) != 0) { | ||
| 1246 | return Modes[5]; | ||
| 1247 | } | ||
| 1248 | if ((low & 0b01000000) != 0) { | ||
| 1249 | return Modes[6]; | ||
| 1250 | } | ||
| 1251 | if ((low & 0b10000000) != 0) { | ||
| 1252 | return Modes[7]; | ||
| 1253 | } | ||
| 1254 | return Modes[8]; // Invalid mode | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | struct IndexInfo { | ||
| 1258 | uint64_t value; | ||
| 1259 | int numBits; | ||
| 1260 | }; | ||
| 1261 | |||
| 1262 | uint8_t interpolate(uint8_t e0, uint8_t e1, const IndexInfo &index) const { | ||
| 1263 | static constexpr uint16_t weights2[] = {0, 21, 43, 64}; | ||
| 1264 | static constexpr uint16_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64}; | ||
| 1265 | static constexpr uint16_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30, | ||
| 1266 | 34, 38, 43, 47, 51, 55, 60, 64}; | ||
| 1267 | static constexpr uint16_t const *weightsN[] = { | ||
| 1268 | nullptr, nullptr, weights2, weights3, weights4 | ||
| 1269 | }; | ||
| 1270 | auto weights = weightsN[index.numBits]; | ||
| 1271 | assert(weights != nullptr); | ||
| 1272 | return (uint8_t) (((64 - weights[index.value]) * uint16_t(e0) + weights[index.value] * uint16_t(e1) + 32) >> 6); | ||
| 1273 | } | ||
| 1274 | |||
| 1275 | void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch) const { | ||
| 1276 | auto const &mode = this->mode(); | ||
| 1277 | |||
| 1278 | if (mode.IDX < 0) // Invalid mode: | ||
| 1279 | { | ||
| 1280 | for (size_t y = 0; y < 4 && y + dstY < dstHeight; y++) { | ||
| 1281 | for (size_t x = 0; x < 4 && x + dstX < dstWidth; x++) { | ||
| 1282 | auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y); | ||
| 1283 | out->rgb = {0, 0, 0}; | ||
| 1284 | out->a = 0; | ||
| 1285 | } | ||
| 1286 | } | ||
| 1287 | return; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | using Endpoint = std::array<Color, 2>; | ||
| 1291 | std::array<Endpoint, MaxSubsets> subsets; | ||
| 1292 | |||
| 1293 | for (size_t i = 0; i < mode.NS; i++) { | ||
| 1294 | auto &subset = subsets[i]; | ||
| 1295 | subset[0].rgb.r = Get(mode.Red(i * 2 + 0)); | ||
| 1296 | subset[0].rgb.g = Get(mode.Green(i * 2 + 0)); | ||
| 1297 | subset[0].rgb.b = Get(mode.Blue(i * 2 + 0)); | ||
| 1298 | subset[0].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 0)) : 255; | ||
| 1299 | |||
| 1300 | subset[1].rgb.r = Get(mode.Red(i * 2 + 1)); | ||
| 1301 | subset[1].rgb.g = Get(mode.Green(i * 2 + 1)); | ||
| 1302 | subset[1].rgb.b = Get(mode.Blue(i * 2 + 1)); | ||
| 1303 | subset[1].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 1)) : 255; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | if (mode.SPB > 0) { | ||
| 1307 | auto pbit0 = Get(mode.SharedPBit0()); | ||
| 1308 | auto pbit1 = Get(mode.SharedPBit1()); | ||
| 1309 | subsets[0][0].rgb = (subsets[0][0].rgb << 1) | pbit0; | ||
| 1310 | subsets[0][1].rgb = (subsets[0][1].rgb << 1) | pbit0; | ||
| 1311 | subsets[1][0].rgb = (subsets[1][0].rgb << 1) | pbit1; | ||
| 1312 | subsets[1][1].rgb = (subsets[1][1].rgb << 1) | pbit1; | ||
| 1313 | } | ||
| 1314 | |||
| 1315 | if (mode.EPB > 0) { | ||
| 1316 | for (size_t i = 0; i < mode.NS; i++) { | ||
| 1317 | auto &subset = subsets[i]; | ||
| 1318 | auto pbit0 = Get(mode.EndpointPBit(i * 2 + 0)); | ||
| 1319 | auto pbit1 = Get(mode.EndpointPBit(i * 2 + 1)); | ||
| 1320 | subset[0].rgb = (subset[0].rgb << 1) | pbit0; | ||
| 1321 | subset[1].rgb = (subset[1].rgb << 1) | pbit1; | ||
| 1322 | if (mode.AB > 0) { | ||
| 1323 | subset[0].a = (subset[0].a << 1) | pbit0; | ||
| 1324 | subset[1].a = (subset[1].a << 1) | pbit1; | ||
| 1325 | } | ||
| 1326 | } | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | auto const colorBits = mode.CB + mode.SPB + mode.EPB; | ||
| 1330 | auto const alphaBits = mode.AB + mode.SPB + mode.EPB; | ||
| 1331 | |||
| 1332 | for (size_t i = 0; i < mode.NS; i++) { | ||
| 1333 | auto &subset = subsets[i]; | ||
| 1334 | subset[0].rgb = subset[0].rgb << (8 - colorBits); | ||
| 1335 | subset[1].rgb = subset[1].rgb << (8 - colorBits); | ||
| 1336 | subset[0].rgb = subset[0].rgb | (subset[0].rgb >> colorBits); | ||
| 1337 | subset[1].rgb = subset[1].rgb | (subset[1].rgb >> colorBits); | ||
| 1338 | |||
| 1339 | if (mode.AB > 0) { | ||
| 1340 | subset[0].a = subset[0].a << (8 - alphaBits); | ||
| 1341 | subset[1].a = subset[1].a << (8 - alphaBits); | ||
| 1342 | subset[0].a = subset[0].a | (subset[0].a >> alphaBits); | ||
| 1343 | subset[1].a = subset[1].a | (subset[1].a >> alphaBits); | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | int colorIndexBitOffset = 0; | ||
| 1348 | int alphaIndexBitOffset = 0; | ||
| 1349 | for (int y = 0; y < 4; y++) { | ||
| 1350 | for (int x = 0; x < 4; x++) { | ||
| 1351 | auto texelIdx = y * 4 + x; | ||
| 1352 | auto partitionIdx = Get(mode.Partition()); | ||
| 1353 | assert(partitionIdx < MaxPartitions); | ||
| 1354 | auto subsetIdx = subsetIndex(mode, partitionIdx, texelIdx); | ||
| 1355 | assert(subsetIdx < MaxSubsets); | ||
| 1356 | auto const &subset = subsets[subsetIdx]; | ||
| 1357 | |||
| 1358 | auto anchorIdx = anchorIndex(mode, partitionIdx, subsetIdx); | ||
| 1359 | auto isAnchor = anchorIdx == texelIdx; | ||
| 1360 | auto colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset); | ||
| 1361 | auto alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset); | ||
| 1362 | |||
| 1363 | if (y + dstY >= dstHeight || x + dstX >= dstWidth) { | ||
| 1364 | // Don't be tempted to skip early at the loops: | ||
| 1365 | // The calls to colorIndex() and alphaIndex() adjust bit | ||
| 1366 | // offsets that need to be carefully tracked. | ||
| 1367 | continue; | ||
| 1368 | } | ||
| 1369 | |||
| 1370 | Color output; | ||
| 1371 | // Note: We flip r and b channels past this point as the texture storage is BGR while the output is RGB | ||
| 1372 | output.rgb.r = interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx); | ||
| 1373 | output.rgb.g = interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx); | ||
| 1374 | output.rgb.b = interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx); | ||
| 1375 | output.a = interpolate(subset[0].a, subset[1].a, alphaIdx); | ||
| 1376 | |||
| 1377 | switch (Get(mode.Rotation())) { | ||
| 1378 | default: | ||
| 1379 | break; | ||
| 1380 | case 1: | ||
| 1381 | std::swap(output.a, output.rgb.b); | ||
| 1382 | break; | ||
| 1383 | case 2: | ||
| 1384 | std::swap(output.a, output.rgb.g); | ||
| 1385 | break; | ||
| 1386 | case 3: | ||
| 1387 | std::swap(output.a, output.rgb.r); | ||
| 1388 | break; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y); | ||
| 1392 | *out = output; | ||
| 1393 | } | ||
| 1394 | } | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | int subsetIndex(const Mode &mode, int partitionIdx, int texelIndex) const { | ||
| 1398 | switch (mode.NS) { | ||
| 1399 | default: | ||
| 1400 | return 0; | ||
| 1401 | case 2: | ||
| 1402 | return PartitionTable2[partitionIdx][texelIndex]; | ||
| 1403 | case 3: | ||
| 1404 | return PartitionTable3[partitionIdx][texelIndex]; | ||
| 1405 | } | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | int anchorIndex(const Mode &mode, int partitionIdx, int subsetIdx) const { | ||
| 1409 | // ARB_texture_compression_bptc states: | ||
| 1410 | // "In partition zero, the anchor index is always index zero. | ||
| 1411 | // In other partitions, the anchor index is specified by tables | ||
| 1412 | // Table.A2 and Table.A3."" | ||
| 1413 | // Note: This is really confusing - I believe they meant subset instead | ||
| 1414 | // of partition here. | ||
| 1415 | switch (subsetIdx) { | ||
| 1416 | default: | ||
| 1417 | return 0; | ||
| 1418 | case 1: | ||
| 1419 | return mode.NS == 2 ? AnchorTable2[partitionIdx] : AnchorTable3a[partitionIdx]; | ||
| 1420 | case 2: | ||
| 1421 | return AnchorTable3b[partitionIdx]; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | IndexInfo colorIndex(const Mode &mode, bool isAnchor, | ||
| 1426 | int &indexBitOffset) const { | ||
| 1427 | // ARB_texture_compression_bptc states: | ||
| 1428 | // "The index value for interpolating color comes from the secondary | ||
| 1429 | // index for the texel if the format has an index selection bit and its | ||
| 1430 | // value is one and from the primary index otherwise."" | ||
| 1431 | auto idx = Get(mode.IndexSelection()); | ||
| 1432 | assert(idx <= 1); | ||
| 1433 | bool secondary = idx == 1; | ||
| 1434 | auto numBits = secondary ? mode.IB2 : mode.IB; | ||
| 1435 | auto numReadBits = numBits - (isAnchor ? 1 : 0); | ||
| 1436 | auto index = | ||
| 1437 | Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) | ||
| 1438 | : mode.PrimaryIndex(indexBitOffset, numReadBits)); | ||
| 1439 | indexBitOffset += numReadBits; | ||
| 1440 | return {index, numBits}; | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | IndexInfo alphaIndex(const Mode &mode, bool isAnchor, | ||
| 1444 | int &indexBitOffset) const { | ||
| 1445 | // ARB_texture_compression_bptc states: | ||
| 1446 | // "The alpha index comes from the secondary index if the block has a | ||
| 1447 | // secondary index and the block either doesn't have an index selection | ||
| 1448 | // bit or that bit is zero and the primary index otherwise." | ||
| 1449 | auto idx = Get(mode.IndexSelection()); | ||
| 1450 | assert(idx <= 1); | ||
| 1451 | bool secondary = (mode.IB2 != 0) && (idx == 0); | ||
| 1452 | auto numBits = secondary ? mode.IB2 : mode.IB; | ||
| 1453 | auto numReadBits = numBits - (isAnchor ? 1 : 0); | ||
| 1454 | auto index = | ||
| 1455 | Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) | ||
| 1456 | : mode.PrimaryIndex(indexBitOffset, numReadBits)); | ||
| 1457 | indexBitOffset += numReadBits; | ||
| 1458 | return {index, numBits}; | ||
| 1459 | } | ||
| 1460 | |||
| 1461 | // Assumes little-endian | ||
| 1462 | uint64_t low; | ||
| 1463 | uint64_t high; | ||
| 1464 | }; | ||
| 1465 | |||
| 1466 | } // namespace BC7 | ||
| 1467 | } // anonymous namespace | ||
| 1468 | |||
| 1469 | namespace bcn { | ||
| 1470 | constexpr size_t R8Bpp{1}; //!< The amount of bytes per pixel in R8 | ||
| 1471 | constexpr size_t R8g8Bpp{2}; //!< The amount of bytes per pixel in R8G8 | ||
| 1472 | constexpr size_t R8g8b8a8Bpp{4}; //!< The amount of bytes per pixel in R8G8B8A8 | ||
| 1473 | constexpr size_t R16g16b16a16Bpp{8}; //!< The amount of bytes per pixel in R16G16B16 | ||
| 1474 | |||
| 1475 | void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { | ||
| 1476 | const auto *color{reinterpret_cast<const BC_color *>(src)}; | ||
| 1477 | size_t pitch{R8g8b8a8Bpp * width}; | ||
| 1478 | color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, true, false); | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { | ||
| 1482 | const auto *alpha{reinterpret_cast<const BC_alpha *>(src)}; | ||
| 1483 | const auto *color{reinterpret_cast<const BC_color *>(src + 8)}; | ||
| 1484 | size_t pitch{R8g8b8a8Bpp * width}; | ||
| 1485 | color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true); | ||
| 1486 | alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp); | ||
| 1487 | } | ||
| 1488 | |||
| 1489 | void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { | ||
| 1490 | const auto *alpha{reinterpret_cast<const BC_channel *>(src)}; | ||
| 1491 | const auto *color{reinterpret_cast<const BC_color *>(src + 8)}; | ||
| 1492 | size_t pitch{R8g8b8a8Bpp * width}; | ||
| 1493 | color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true); | ||
| 1494 | alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, 3, false); | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { | ||
| 1498 | const auto *red{reinterpret_cast<const BC_channel *>(src)}; | ||
| 1499 | size_t pitch{R8Bpp * width}; | ||
| 1500 | red->decode(dst, x, y, width, height, pitch, R8Bpp, 0, isSigned); | ||
| 1501 | } | ||
| 1502 | |||
| 1503 | void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { | ||
| 1504 | const auto *red{reinterpret_cast<const BC_channel *>(src)}; | ||
| 1505 | const auto *green{reinterpret_cast<const BC_channel *>(src + 8)}; | ||
| 1506 | size_t pitch{R8g8Bpp * width}; | ||
| 1507 | red->decode(dst, x, y, width, height, pitch, R8g8Bpp, 0, isSigned); | ||
| 1508 | green->decode(dst, x, y, width, height, pitch, R8g8Bpp, 1, isSigned); | ||
| 1509 | } | ||
| 1510 | |||
| 1511 | void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { | ||
| 1512 | const auto *block{reinterpret_cast<const BC6H::Block *>(src)}; | ||
| 1513 | size_t pitch{R16g16b16a16Bpp * width}; | ||
| 1514 | block->decode(dst, x, y, width, height, pitch, R16g16b16a16Bpp, isSigned); | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { | ||
| 1518 | const auto *block{reinterpret_cast<const BC7::Block *>(src)}; | ||
| 1519 | size_t pitch{R8g8b8a8Bpp * width}; | ||
| 1520 | block->decode(dst, x, y, width, height, pitch); | ||
| 1521 | } | ||
| 1522 | } | ||
diff --git a/externals/bc_decoder/bc_decoder.h b/externals/bc_decoder/bc_decoder.h new file mode 100644 index 000000000..4f0ead7d3 --- /dev/null +++ b/externals/bc_decoder/bc_decoder.h | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // SPDX-License-Identifier: MPL-2.0 | ||
| 2 | // Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <cstdint> | ||
| 7 | |||
| 8 | namespace bcn { | ||
| 9 | /** | ||
| 10 | * @brief Decodes a BC1 encoded image to R8G8B8A8 | ||
| 11 | */ | ||
| 12 | void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); | ||
| 13 | |||
| 14 | /** | ||
| 15 | * @brief Decodes a BC2 encoded image to R8G8B8A8 | ||
| 16 | */ | ||
| 17 | void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); | ||
| 18 | |||
| 19 | /** | ||
| 20 | * @brief Decodes a BC3 encoded image to R8G8B8A8 | ||
| 21 | */ | ||
| 22 | void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); | ||
| 23 | |||
| 24 | /** | ||
| 25 | * @brief Decodes a BC4 encoded image to R8 | ||
| 26 | */ | ||
| 27 | void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); | ||
| 28 | |||
| 29 | /** | ||
| 30 | * @brief Decodes a BC5 encoded image to R8G8 | ||
| 31 | */ | ||
| 32 | void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); | ||
| 33 | |||
| 34 | /** | ||
| 35 | * @brief Decodes a BC6 encoded image to R16G16B16A16 | ||
| 36 | */ | ||
| 37 | void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); | ||
| 38 | |||
| 39 | /** | ||
| 40 | * @brief Decodes a BC7 encoded image to R8G8B8A8 | ||
| 41 | */ | ||
| 42 | void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); | ||
| 43 | } | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e9e6f278d..3b2fe01da 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -220,8 +220,8 @@ add_library(video_core STATIC | |||
| 220 | surface.h | 220 | surface.h |
| 221 | texture_cache/accelerated_swizzle.cpp | 221 | texture_cache/accelerated_swizzle.cpp |
| 222 | texture_cache/accelerated_swizzle.h | 222 | texture_cache/accelerated_swizzle.h |
| 223 | texture_cache/decode_bc4.cpp | 223 | texture_cache/decode_bc.cpp |
| 224 | texture_cache/decode_bc4.h | 224 | texture_cache/decode_bc.h |
| 225 | texture_cache/descriptor_table.h | 225 | texture_cache/descriptor_table.h |
| 226 | texture_cache/formatter.cpp | 226 | texture_cache/formatter.cpp |
| 227 | texture_cache/formatter.h | 227 | texture_cache/formatter.h |
| @@ -279,7 +279,7 @@ add_library(video_core STATIC | |||
| 279 | create_target_directory_groups(video_core) | 279 | create_target_directory_groups(video_core) |
| 280 | 280 | ||
| 281 | target_link_libraries(video_core PUBLIC common core) | 281 | target_link_libraries(video_core PUBLIC common core) |
| 282 | target_link_libraries(video_core PUBLIC glad shader_recompiler stb) | 282 | target_link_libraries(video_core PUBLIC glad shader_recompiler stb bc_decoder) |
| 283 | 283 | ||
| 284 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) | 284 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) |
| 285 | add_dependencies(video_core ffmpeg-build) | 285 | add_dependencies(video_core ffmpeg-build) |
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index cf2964a3f..28d4b15a0 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -495,6 +495,9 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 495 | const Region2D& dst_region, const Region2D& src_region, | 495 | const Region2D& dst_region, const Region2D& src_region, |
| 496 | Tegra::Engines::Fermi2D::Filter filter, | 496 | Tegra::Engines::Fermi2D::Filter filter, |
| 497 | Tegra::Engines::Fermi2D::Operation operation) { | 497 | Tegra::Engines::Fermi2D::Operation operation) { |
| 498 | if (!device.IsExtShaderStencilExportSupported()) { | ||
| 499 | return; | ||
| 500 | } | ||
| 498 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); | 501 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); |
| 499 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); | 502 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); |
| 500 | const BlitImagePipelineKey key{ | 503 | const BlitImagePipelineKey key{ |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 9a0b10568..a8540339d 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -259,6 +259,26 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with | |||
| 259 | break; | 259 | break; |
| 260 | } | 260 | } |
| 261 | } | 261 | } |
| 262 | // Transcode on hardware that doesn't support BCn natively | ||
| 263 | if (!device.IsOptimalBcnSupported() && VideoCore::Surface::IsPixelFormatBCn(pixel_format)) { | ||
| 264 | const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); | ||
| 265 | if (pixel_format == PixelFormat::BC4_SNORM) { | ||
| 266 | tuple.format = VK_FORMAT_R8_SNORM; | ||
| 267 | } else if (pixel_format == PixelFormat::BC4_UNORM) { | ||
| 268 | tuple.format = VK_FORMAT_R8_UNORM; | ||
| 269 | } else if (pixel_format == PixelFormat::BC5_SNORM) { | ||
| 270 | tuple.format = VK_FORMAT_R8G8_SNORM; | ||
| 271 | } else if (pixel_format == PixelFormat::BC5_UNORM) { | ||
| 272 | tuple.format = VK_FORMAT_R8G8_UNORM; | ||
| 273 | } else if (pixel_format == PixelFormat::BC6H_SFLOAT || | ||
| 274 | pixel_format == PixelFormat::BC6H_UFLOAT) { | ||
| 275 | tuple.format = VK_FORMAT_R16G16B16A16_SFLOAT; | ||
| 276 | } else if (is_srgb) { | ||
| 277 | tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; | ||
| 278 | } else { | ||
| 279 | tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; | ||
| 280 | } | ||
| 281 | } | ||
| 262 | const bool attachable = (tuple.usage & Attachable) != 0; | 282 | const bool attachable = (tuple.usage & Attachable) != 0; |
| 263 | const bool storage = (tuple.usage & Storage) != 0; | 283 | const bool storage = (tuple.usage & Storage) != 0; |
| 264 | 284 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ddf28ca28..454bb66a4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <fmt/format.h> | 12 | #include <fmt/format.h> |
| 13 | 13 | ||
| 14 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/polyfill_ranges.h" | ||
| 15 | #include "common/scope_exit.h" | 16 | #include "common/scope_exit.h" |
| 16 | #include "common/settings.h" | 17 | #include "common/settings.h" |
| 17 | #include "common/telemetry.h" | 18 | #include "common/telemetry.h" |
| @@ -65,6 +66,21 @@ std::string BuildCommaSeparatedExtensions( | |||
| 65 | return fmt::format("{}", fmt::join(available_extensions, ",")); | 66 | return fmt::format("{}", fmt::join(available_extensions, ",")); |
| 66 | } | 67 | } |
| 67 | 68 | ||
| 69 | DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) { | ||
| 70 | if (!Settings::values.renderer_debug) { | ||
| 71 | return DebugCallback{}; | ||
| 72 | } | ||
| 73 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 74 | const auto it = std::ranges::find_if(*properties, [](const auto& prop) { | ||
| 75 | return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0; | ||
| 76 | }); | ||
| 77 | if (it != properties->end()) { | ||
| 78 | return CreateDebugUtilsCallback(instance); | ||
| 79 | } else { | ||
| 80 | return CreateDebugReportCallback(instance); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 68 | } // Anonymous namespace | 84 | } // Anonymous namespace |
| 69 | 85 | ||
| 70 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | 86 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, |
| @@ -87,7 +103,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 87 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | 103 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), |
| 88 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 104 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 89 | Settings::values.renderer_debug.GetValue())), | 105 | Settings::values.renderer_debug.GetValue())), |
| 90 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 106 | debug_callback(MakeDebugCallback(instance, dld)), |
| 91 | surface(CreateSurface(instance, render_window.GetWindowInfo())), | 107 | surface(CreateSurface(instance, render_window.GetWindowInfo())), |
| 92 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), | 108 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), |
| 93 | scheduler(device, state_tracker), | 109 | scheduler(device, state_tracker), |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index b2e8cbd1b..ca22c0baa 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <variant> | ||
| 8 | 9 | ||
| 9 | #include "common/dynamic_library.h" | 10 | #include "common/dynamic_library.h" |
| 10 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| @@ -33,6 +34,8 @@ class GPU; | |||
| 33 | 34 | ||
| 34 | namespace Vulkan { | 35 | namespace Vulkan { |
| 35 | 36 | ||
| 37 | using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>; | ||
| 38 | |||
| 36 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | 39 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, |
| 37 | VkSurfaceKHR surface); | 40 | VkSurfaceKHR surface); |
| 38 | 41 | ||
| @@ -71,7 +74,7 @@ private: | |||
| 71 | vk::InstanceDispatch dld; | 74 | vk::InstanceDispatch dld; |
| 72 | 75 | ||
| 73 | vk::Instance instance; | 76 | vk::Instance instance; |
| 74 | vk::DebugUtilsMessenger debug_callback; | 77 | DebugCallback debug_callback; |
| 75 | vk::SurfaceKHR surface; | 78 | vk::SurfaceKHR surface; |
| 76 | 79 | ||
| 77 | ScreenInfo screen_info; | 80 | ScreenInfo screen_info; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 660f7c9ff..b72f95235 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -590,7 +590,8 @@ void BufferCacheRuntime::ReserveNullBuffer() { | |||
| 590 | .pNext = nullptr, | 590 | .pNext = nullptr, |
| 591 | .flags = 0, | 591 | .flags = 0, |
| 592 | .size = 4, | 592 | .size = 4, |
| 593 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 593 | .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | |
| 594 | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 594 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 595 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 595 | .queueFamilyIndexCount = 0, | 596 | .queueFamilyIndexCount = 0, |
| 596 | .pQueueFamilyIndices = nullptr, | 597 | .pQueueFamilyIndices = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c1595642e..ad35cacac 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -652,13 +652,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { | |||
| 652 | .pNext = nullptr, | 652 | .pNext = nullptr, |
| 653 | .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, | 653 | .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, |
| 654 | }; | 654 | }; |
| 655 | const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 655 | VkPipelineViewportStateCreateInfo viewport_ci{ | 656 | VkPipelineViewportStateCreateInfo viewport_ci{ |
| 656 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | 657 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |
| 657 | .pNext = nullptr, | 658 | .pNext = nullptr, |
| 658 | .flags = 0, | 659 | .flags = 0, |
| 659 | .viewportCount = Maxwell::NumViewports, | 660 | .viewportCount = num_viewports, |
| 660 | .pViewports = nullptr, | 661 | .pViewports = nullptr, |
| 661 | .scissorCount = Maxwell::NumViewports, | 662 | .scissorCount = num_viewports, |
| 662 | .pScissors = nullptr, | 663 | .pScissors = nullptr, |
| 663 | }; | 664 | }; |
| 664 | if (device.IsNvViewportSwizzleSupported()) { | 665 | if (device.IsNvViewportSwizzleSupported()) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9f316113c..d600c4e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -309,7 +309,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 309 | .support_int16 = device.IsShaderInt16Supported(), | 309 | .support_int16 = device.IsShaderInt16Supported(), |
| 310 | .support_int64 = device.IsShaderInt64Supported(), | 310 | .support_int64 = device.IsShaderInt64Supported(), |
| 311 | .support_vertex_instance_id = false, | 311 | .support_vertex_instance_id = false, |
| 312 | .support_float_controls = true, | 312 | .support_float_controls = device.IsKhrShaderFloatControlsSupported(), |
| 313 | .support_separate_denorm_behavior = | 313 | .support_separate_denorm_behavior = |
| 314 | float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, | 314 | float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, |
| 315 | .support_separate_rounding_mode = | 315 | .support_separate_rounding_mode = |
| @@ -325,12 +325,13 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 325 | .support_fp64_signed_zero_nan_preserve = | 325 | .support_fp64_signed_zero_nan_preserve = |
| 326 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, | 326 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, |
| 327 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), | 327 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), |
| 328 | .support_vote = true, | 328 | .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), |
| 329 | .support_viewport_index_layer_non_geometry = | 329 | .support_viewport_index_layer_non_geometry = |
| 330 | device.IsExtShaderViewportIndexLayerSupported(), | 330 | device.IsExtShaderViewportIndexLayerSupported(), |
| 331 | .support_viewport_mask = device.IsNvViewportArray2Supported(), | 331 | .support_viewport_mask = device.IsNvViewportArray2Supported(), |
| 332 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | 332 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), |
| 333 | .support_demote_to_helper_invocation = true, | 333 | .support_demote_to_helper_invocation = |
| 334 | device.IsExtShaderDemoteToHelperInvocationSupported(), | ||
| 334 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | 335 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), |
| 335 | .support_derivative_control = true, | 336 | .support_derivative_control = true, |
| 336 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 337 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 84e3a30cc..f7c0d939a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -315,7 +315,14 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 315 | FlushWork(); | 315 | FlushWork(); |
| 316 | gpu_memory->FlushCaching(); | 316 | gpu_memory->FlushCaching(); |
| 317 | 317 | ||
| 318 | #if ANDROID | ||
| 319 | if (Settings::IsGPULevelHigh()) { | ||
| 320 | // This is problematic on Android, disable on GPU Normal. | ||
| 321 | query_cache.UpdateCounters(); | ||
| 322 | } | ||
| 323 | #else | ||
| 318 | query_cache.UpdateCounters(); | 324 | query_cache.UpdateCounters(); |
| 325 | #endif | ||
| 319 | 326 | ||
| 320 | auto& regs = maxwell3d->regs; | 327 | auto& regs = maxwell3d->regs; |
| 321 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | 328 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || |
| @@ -925,7 +932,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 925 | } | 932 | } |
| 926 | const bool is_rescaling{texture_cache.IsRescaling()}; | 933 | const bool is_rescaling{texture_cache.IsRescaling()}; |
| 927 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; | 934 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; |
| 928 | const std::array viewports{ | 935 | const std::array viewport_list{ |
| 929 | GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), | 936 | GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), |
| 930 | GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), | 937 | GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), |
| 931 | GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), | 938 | GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), |
| @@ -935,7 +942,11 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 935 | GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), | 942 | GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), |
| 936 | GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), | 943 | GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), |
| 937 | }; | 944 | }; |
| 938 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 945 | scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) { |
| 946 | const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 947 | const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports); | ||
| 948 | cmdbuf.SetViewport(0, viewports); | ||
| 949 | }); | ||
| 939 | } | 950 | } |
| 940 | 951 | ||
| 941 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 952 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -948,7 +959,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 948 | up_scale = Settings::values.resolution_info.up_scale; | 959 | up_scale = Settings::values.resolution_info.up_scale; |
| 949 | down_shift = Settings::values.resolution_info.down_shift; | 960 | down_shift = Settings::values.resolution_info.down_shift; |
| 950 | } | 961 | } |
| 951 | const std::array scissors{ | 962 | const std::array scissor_list{ |
| 952 | GetScissorState(regs, 0, up_scale, down_shift), | 963 | GetScissorState(regs, 0, up_scale, down_shift), |
| 953 | GetScissorState(regs, 1, up_scale, down_shift), | 964 | GetScissorState(regs, 1, up_scale, down_shift), |
| 954 | GetScissorState(regs, 2, up_scale, down_shift), | 965 | GetScissorState(regs, 2, up_scale, down_shift), |
| @@ -966,7 +977,11 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 966 | GetScissorState(regs, 14, up_scale, down_shift), | 977 | GetScissorState(regs, 14, up_scale, down_shift), |
| 967 | GetScissorState(regs, 15, up_scale, down_shift), | 978 | GetScissorState(regs, 15, up_scale, down_shift), |
| 968 | }; | 979 | }; |
| 969 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 980 | scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) { |
| 981 | const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 982 | const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors); | ||
| 983 | cmdbuf.SetScissor(0, scissors); | ||
| 984 | }); | ||
| 970 | } | 985 | } |
| 971 | 986 | ||
| 972 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { | 987 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 62b251a9b..ce92f66ab 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -38,18 +38,20 @@ size_t Region(size_t iterator) noexcept { | |||
| 38 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 38 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 39 | Scheduler& scheduler_) | 39 | Scheduler& scheduler_) |
| 40 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | 40 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 41 | const VkBufferCreateInfo stream_ci = { | 41 | VkBufferCreateInfo stream_ci = { |
| 42 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 42 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 43 | .pNext = nullptr, | 43 | .pNext = nullptr, |
| 44 | .flags = 0, | 44 | .flags = 0, |
| 45 | .size = STREAM_BUFFER_SIZE, | 45 | .size = STREAM_BUFFER_SIZE, |
| 46 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 46 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 47 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | 47 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 48 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, | ||
| 49 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 48 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 50 | .queueFamilyIndexCount = 0, | 49 | .queueFamilyIndexCount = 0, |
| 51 | .pQueueFamilyIndices = nullptr, | 50 | .pQueueFamilyIndices = nullptr, |
| 52 | }; | 51 | }; |
| 52 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 53 | stream_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||
| 54 | } | ||
| 53 | stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream); | 55 | stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream); |
| 54 | if (device.HasDebuggingToolAttached()) { | 56 | if (device.HasDebuggingToolAttached()) { |
| 55 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | 57 | stream_buffer.SetObjectNameEXT("Stream Buffer"); |
| @@ -164,19 +166,21 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s | |||
| 164 | StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, | 166 | StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, |
| 165 | bool deferred) { | 167 | bool deferred) { |
| 166 | const u32 log2 = Common::Log2Ceil64(size); | 168 | const u32 log2 = Common::Log2Ceil64(size); |
| 167 | const VkBufferCreateInfo buffer_ci = { | 169 | VkBufferCreateInfo buffer_ci = { |
| 168 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 170 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 169 | .pNext = nullptr, | 171 | .pNext = nullptr, |
| 170 | .flags = 0, | 172 | .flags = 0, |
| 171 | .size = 1ULL << log2, | 173 | .size = 1ULL << log2, |
| 172 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 174 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 173 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | 175 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | |
| 174 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | 176 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
| 175 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, | ||
| 176 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 177 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 177 | .queueFamilyIndexCount = 0, | 178 | .queueFamilyIndexCount = 0, |
| 178 | .pQueueFamilyIndices = nullptr, | 179 | .pQueueFamilyIndices = nullptr, |
| 179 | }; | 180 | }; |
| 181 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 182 | buffer_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||
| 183 | } | ||
| 180 | vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage); | 184 | vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage); |
| 181 | if (device.HasDebuggingToolAttached()) { | 185 | if (device.HasDebuggingToolAttached()) { |
| 182 | ++buffer_index; | 186 | ++buffer_index; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ce6acc30c..8385b5509 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1279,6 +1279,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu | |||
| 1279 | flags |= VideoCommon::ImageFlagBits::Converted; | 1279 | flags |= VideoCommon::ImageFlagBits::Converted; |
| 1280 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; | 1280 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; |
| 1281 | } | 1281 | } |
| 1282 | if (IsPixelFormatBCn(info.format) && !runtime->device.IsOptimalBcnSupported()) { | ||
| 1283 | flags |= VideoCommon::ImageFlagBits::Converted; | ||
| 1284 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; | ||
| 1285 | } | ||
| 1282 | if (runtime->device.HasDebuggingToolAttached()) { | 1286 | if (runtime->device.HasDebuggingToolAttached()) { |
| 1283 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1287 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 1284 | } | 1288 | } |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index cb51529e4..e16cd5e73 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -269,6 +269,28 @@ bool IsPixelFormatASTC(PixelFormat format) { | |||
| 269 | } | 269 | } |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | bool IsPixelFormatBCn(PixelFormat format) { | ||
| 273 | switch (format) { | ||
| 274 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 275 | case PixelFormat::BC2_UNORM: | ||
| 276 | case PixelFormat::BC3_UNORM: | ||
| 277 | case PixelFormat::BC4_UNORM: | ||
| 278 | case PixelFormat::BC4_SNORM: | ||
| 279 | case PixelFormat::BC5_UNORM: | ||
| 280 | case PixelFormat::BC5_SNORM: | ||
| 281 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 282 | case PixelFormat::BC2_SRGB: | ||
| 283 | case PixelFormat::BC3_SRGB: | ||
| 284 | case PixelFormat::BC7_UNORM: | ||
| 285 | case PixelFormat::BC6H_UFLOAT: | ||
| 286 | case PixelFormat::BC6H_SFLOAT: | ||
| 287 | case PixelFormat::BC7_SRGB: | ||
| 288 | return true; | ||
| 289 | default: | ||
| 290 | return false; | ||
| 291 | } | ||
| 292 | } | ||
| 293 | |||
| 272 | bool IsPixelFormatSRGB(PixelFormat format) { | 294 | bool IsPixelFormatSRGB(PixelFormat format) { |
| 273 | switch (format) { | 295 | switch (format) { |
| 274 | case PixelFormat::A8B8G8R8_SRGB: | 296 | case PixelFormat::A8B8G8R8_SRGB: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0225d3287..9b9c4d9bc 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -501,6 +501,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format); | |||
| 501 | 501 | ||
| 502 | bool IsPixelFormatASTC(PixelFormat format); | 502 | bool IsPixelFormatASTC(PixelFormat format); |
| 503 | 503 | ||
| 504 | bool IsPixelFormatBCn(PixelFormat format); | ||
| 505 | |||
| 504 | bool IsPixelFormatSRGB(PixelFormat format); | 506 | bool IsPixelFormatSRGB(PixelFormat format); |
| 505 | 507 | ||
| 506 | bool IsPixelFormatInteger(PixelFormat format); | 508 | bool IsPixelFormatInteger(PixelFormat format); |
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp new file mode 100644 index 000000000..3e26474a3 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc.cpp | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | #include <bc_decoder.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr u32 BLOCK_SIZE = 4; | ||
| 16 | |||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | |||
| 19 | constexpr bool IsSigned(PixelFormat pixel_format) { | ||
| 20 | switch (pixel_format) { | ||
| 21 | case PixelFormat::BC4_SNORM: | ||
| 22 | case PixelFormat::BC4_UNORM: | ||
| 23 | case PixelFormat::BC5_SNORM: | ||
| 24 | case PixelFormat::BC5_UNORM: | ||
| 25 | case PixelFormat::BC6H_SFLOAT: | ||
| 26 | case PixelFormat::BC6H_UFLOAT: | ||
| 27 | return true; | ||
| 28 | default: | ||
| 29 | return false; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | constexpr u32 BlockSize(PixelFormat pixel_format) { | ||
| 34 | switch (pixel_format) { | ||
| 35 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 36 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 37 | case PixelFormat::BC4_SNORM: | ||
| 38 | case PixelFormat::BC4_UNORM: | ||
| 39 | return 8; | ||
| 40 | default: | ||
| 41 | return 16; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { | ||
| 47 | switch (pixel_format) { | ||
| 48 | case PixelFormat::BC4_SNORM: | ||
| 49 | case PixelFormat::BC4_UNORM: | ||
| 50 | return 1; | ||
| 51 | case PixelFormat::BC5_SNORM: | ||
| 52 | case PixelFormat::BC5_UNORM: | ||
| 53 | return 2; | ||
| 54 | case PixelFormat::BC6H_SFLOAT: | ||
| 55 | case PixelFormat::BC6H_UFLOAT: | ||
| 56 | return 8; | ||
| 57 | default: | ||
| 58 | return 4; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <auto decompress, PixelFormat pixel_format> | ||
| 63 | void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 64 | bool is_signed = false) { | ||
| 65 | const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); | ||
| 66 | const u32 block_width = std::min(extent.width, BLOCK_SIZE); | ||
| 67 | const u32 block_height = std::min(extent.height, BLOCK_SIZE); | ||
| 68 | const u32 pitch = extent.width * out_bpp; | ||
| 69 | size_t input_offset = 0; | ||
| 70 | size_t output_offset = 0; | ||
| 71 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 72 | for (u32 y = 0; y < extent.height; y += block_height) { | ||
| 73 | size_t row_offset = 0; | ||
| 74 | for (u32 x = 0; x < extent.width; | ||
| 75 | x += block_width, row_offset += block_width * out_bpp) { | ||
| 76 | const u8* src = input.data() + input_offset; | ||
| 77 | u8* const dst = output.data() + output_offset + row_offset; | ||
| 78 | if constexpr (IsSigned(pixel_format)) { | ||
| 79 | decompress(src, dst, x, y, extent.width, extent.height, is_signed); | ||
| 80 | } else { | ||
| 81 | decompress(src, dst, x, y, extent.width, extent.height); | ||
| 82 | } | ||
| 83 | input_offset += BlockSize(pixel_format); | ||
| 84 | } | ||
| 85 | output_offset += block_height * pitch; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 91 | VideoCore::Surface::PixelFormat pixel_format) { | ||
| 92 | switch (pixel_format) { | ||
| 93 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 94 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 95 | DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); | ||
| 96 | break; | ||
| 97 | case PixelFormat::BC2_UNORM: | ||
| 98 | case PixelFormat::BC2_SRGB: | ||
| 99 | DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); | ||
| 100 | break; | ||
| 101 | case PixelFormat::BC3_UNORM: | ||
| 102 | case PixelFormat::BC3_SRGB: | ||
| 103 | DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); | ||
| 104 | break; | ||
| 105 | case PixelFormat::BC4_SNORM: | ||
| 106 | case PixelFormat::BC4_UNORM: | ||
| 107 | DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( | ||
| 108 | input, output, extent, pixel_format == PixelFormat::BC4_SNORM); | ||
| 109 | break; | ||
| 110 | case PixelFormat::BC5_SNORM: | ||
| 111 | case PixelFormat::BC5_UNORM: | ||
| 112 | DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( | ||
| 113 | input, output, extent, pixel_format == PixelFormat::BC5_SNORM); | ||
| 114 | break; | ||
| 115 | case PixelFormat::BC6H_SFLOAT: | ||
| 116 | case PixelFormat::BC6H_UFLOAT: | ||
| 117 | DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( | ||
| 118 | input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); | ||
| 119 | break; | ||
| 120 | case PixelFormat::BC7_SRGB: | ||
| 121 | case PixelFormat::BC7_UNORM: | ||
| 122 | DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc.h index ab2f735be..41d1ec0a3 100644 --- a/src/video_core/texture_cache/decode_bc4.h +++ b/src/video_core/texture_cache/decode_bc.h | |||
| @@ -6,10 +6,14 @@ | |||
| 6 | #include <span> | 6 | #include <span> |
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 10 | 11 | ||
| 11 | namespace VideoCommon { | 12 | namespace VideoCommon { |
| 12 | 13 | ||
| 13 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | 14 | [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); |
| 15 | |||
| 16 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 17 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 14 | 18 | ||
| 15 | } // namespace VideoCommon | 19 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp deleted file mode 100644 index ef98afdca..000000000 --- a/src/video_core/texture_cache/decode_bc4.cpp +++ /dev/null | |||
| @@ -1,96 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 16 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 17 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 18 | const u32 code = (bits >> code_offset) & 7; | ||
| 19 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 20 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 21 | if (red0 > red1) { | ||
| 22 | switch (code) { | ||
| 23 | case 0: | ||
| 24 | return red0; | ||
| 25 | case 1: | ||
| 26 | return red1; | ||
| 27 | case 2: | ||
| 28 | return (6 * red0 + 1 * red1) / 7; | ||
| 29 | case 3: | ||
| 30 | return (5 * red0 + 2 * red1) / 7; | ||
| 31 | case 4: | ||
| 32 | return (4 * red0 + 3 * red1) / 7; | ||
| 33 | case 5: | ||
| 34 | return (3 * red0 + 4 * red1) / 7; | ||
| 35 | case 6: | ||
| 36 | return (2 * red0 + 5 * red1) / 7; | ||
| 37 | case 7: | ||
| 38 | return (1 * red0 + 6 * red1) / 7; | ||
| 39 | } | ||
| 40 | } else { | ||
| 41 | switch (code) { | ||
| 42 | case 0: | ||
| 43 | return red0; | ||
| 44 | case 1: | ||
| 45 | return red1; | ||
| 46 | case 2: | ||
| 47 | return (4 * red0 + 1 * red1) / 5; | ||
| 48 | case 3: | ||
| 49 | return (3 * red0 + 2 * red1) / 5; | ||
| 50 | case 4: | ||
| 51 | return (2 * red0 + 3 * red1) / 5; | ||
| 52 | case 5: | ||
| 53 | return (1 * red0 + 4 * red1) / 5; | ||
| 54 | case 6: | ||
| 55 | return 0; | ||
| 56 | case 7: | ||
| 57 | return 0xff; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 64 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 66 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 67 | size_t input_offset = 0; | ||
| 68 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 69 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 70 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 71 | u64 bits; | ||
| 72 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 73 | input_offset += sizeof(bits); | ||
| 74 | |||
| 75 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 76 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 77 | const u32 linear_z = slice; | ||
| 78 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 79 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 80 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 81 | const u32 offset_y = linear_y * extent.width; | ||
| 82 | const u32 offset_x = linear_x; | ||
| 83 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 84 | const u32 color = DecompressBlock(bits, x, y); | ||
| 85 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 86 | output[output_offset + 1] = 0; | ||
| 87 | output[output_offset + 2] = 0; | ||
| 88 | output[output_offset + 3] = 0xff; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index f781cb7a0..9a618a57a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 26 | #include "video_core/surface.h" | 26 | #include "video_core/surface.h" |
| 27 | #include "video_core/texture_cache/decode_bc4.h" | 27 | #include "video_core/texture_cache/decode_bc.h" |
| 28 | #include "video_core/texture_cache/format_lookup_table.h" | 28 | #include "video_core/texture_cache/format_lookup_table.h" |
| 29 | #include "video_core/texture_cache/formatter.h" | 29 | #include "video_core/texture_cache/formatter.h" |
| 30 | #include "video_core/texture_cache/samples_helper.h" | 30 | #include "video_core/texture_cache/samples_helper.h" |
| @@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; | |||
| 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 62 | using VideoCore::Surface::SurfaceType; | 62 | using VideoCore::Surface::SurfaceType; |
| 63 | 63 | ||
| 64 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 65 | |||
| 66 | struct LevelInfo { | 64 | struct LevelInfo { |
| 67 | Extent3D size; | 65 | Extent3D size; |
| 68 | Extent3D block; | 66 | Extent3D block; |
| @@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | |||
| 612 | } | 610 | } |
| 613 | return output_size; | 611 | return output_size; |
| 614 | } | 612 | } |
| 615 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | 613 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * |
| 614 | ConvertedBytesPerBlock(info.format); | ||
| 616 | } | 615 | } |
| 617 | 616 | ||
| 618 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | 617 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { |
| @@ -945,7 +944,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 945 | tile_size.height, output.subspan(output_offset)); | 944 | tile_size.height, output.subspan(output_offset)); |
| 946 | 945 | ||
| 947 | output_offset += copy.image_extent.width * copy.image_extent.height * | 946 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 948 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 947 | copy.image_subresource.num_layers * |
| 948 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 949 | } else if (astc) { | 949 | } else if (astc) { |
| 950 | // BC1 uses 0.5 bytes per texel | 950 | // BC1 uses 0.5 bytes per texel |
| 951 | // BC3 uses 1 byte per texel | 951 | // BC3 uses 1 byte per texel |
| @@ -956,7 +956,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 956 | 956 | ||
| 957 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; | 957 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; |
| 958 | const u32 level_size = plane_dim * copy.image_extent.depth * | 958 | const u32 level_size = plane_dim * copy.image_extent.depth * |
| 959 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 959 | copy.image_subresource.num_layers * |
| 960 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 960 | decode_scratch.resize_destructive(level_size); | 961 | decode_scratch.resize_destructive(level_size); |
| 961 | 962 | ||
| 962 | Tegra::Texture::ASTC::Decompress( | 963 | Tegra::Texture::ASTC::Decompress( |
| @@ -976,10 +977,15 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 976 | bpp_div; | 977 | bpp_div; |
| 977 | output_offset += static_cast<u32>(copy.buffer_size); | 978 | output_offset += static_cast<u32>(copy.buffer_size); |
| 978 | } else { | 979 | } else { |
| 979 | DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset)); | 980 | const Extent3D image_extent{ |
| 980 | 981 | .width = copy.image_extent.width, | |
| 982 | .height = copy.image_extent.height * copy.image_subresource.num_layers, | ||
| 983 | .depth = copy.image_extent.depth, | ||
| 984 | }; | ||
| 985 | DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); | ||
| 981 | output_offset += copy.image_extent.width * copy.image_extent.height * | 986 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 982 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 987 | copy.image_subresource.num_layers * |
| 988 | ConvertedBytesPerBlock(info.format); | ||
| 983 | } | 989 | } |
| 984 | } | 990 | } |
| 985 | } | 991 | } |
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp index 671212a49..16ddbe320 100644 --- a/src/video_core/textures/bcn.cpp +++ b/src/video_core/textures/bcn.cpp | |||
| @@ -3,7 +3,6 @@ | |||
| 3 | 3 | ||
| 4 | #include <stb_dxt.h> | 4 | #include <stb_dxt.h> |
| 5 | #include <string.h> | 5 | #include <string.h> |
| 6 | |||
| 7 | #include "common/alignment.h" | 6 | #include "common/alignment.h" |
| 8 | #include "video_core/textures/bcn.h" | 7 | #include "video_core/textures/bcn.h" |
| 9 | #include "video_core/textures/workers.h" | 8 | #include "video_core/textures/workers.h" |
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h index 6464af885..d5d2a16c9 100644 --- a/src/video_core/textures/bcn.h +++ b/src/video_core/textures/bcn.h | |||
| @@ -4,14 +4,13 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <span> | 6 | #include <span> |
| 7 | #include <stdint.h> | 7 | |
| 8 | #include "common/common_types.h" | ||
| 8 | 9 | ||
| 9 | namespace Tegra::Texture::BCN { | 10 | namespace Tegra::Texture::BCN { |
| 10 | 11 | ||
| 11 | void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 12 | void CompressBC1(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output); |
| 12 | std::span<uint8_t> output); | ||
| 13 | 13 | ||
| 14 | void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 14 | void CompressBC3(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output); |
| 15 | std::span<uint8_t> output); | ||
| 16 | 15 | ||
| 17 | } // namespace Tegra::Texture::BCN | 16 | } // namespace Tegra::Texture::BCN |
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 9de484c29..67e8065a4 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp | |||
| @@ -7,10 +7,10 @@ | |||
| 7 | 7 | ||
| 8 | namespace Vulkan { | 8 | namespace Vulkan { |
| 9 | namespace { | 9 | namespace { |
| 10 | VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | 10 | VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, |
| 11 | VkDebugUtilsMessageTypeFlagsEXT type, | 11 | VkDebugUtilsMessageTypeFlagsEXT type, |
| 12 | const VkDebugUtilsMessengerCallbackDataEXT* data, | 12 | const VkDebugUtilsMessengerCallbackDataEXT* data, |
| 13 | [[maybe_unused]] void* user_data) { | 13 | [[maybe_unused]] void* user_data) { |
| 14 | // Skip logging known false-positive validation errors | 14 | // Skip logging known false-positive validation errors |
| 15 | switch (static_cast<u32>(data->messageIdNumber)) { | 15 | switch (static_cast<u32>(data->messageIdNumber)) { |
| 16 | #ifdef ANDROID | 16 | #ifdef ANDROID |
| @@ -62,9 +62,26 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | |||
| 62 | } | 62 | } |
| 63 | return VK_FALSE; | 63 | return VK_FALSE; |
| 64 | } | 64 | } |
| 65 | |||
| 66 | VkBool32 DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, | ||
| 67 | uint64_t object, size_t location, int32_t messageCode, | ||
| 68 | const char* pLayerPrefix, const char* pMessage, void* pUserData) { | ||
| 69 | const VkDebugReportFlagBitsEXT severity = static_cast<VkDebugReportFlagBitsEXT>(flags); | ||
| 70 | const std::string_view message{pMessage}; | ||
| 71 | if (severity & VK_DEBUG_REPORT_ERROR_BIT_EXT) { | ||
| 72 | LOG_CRITICAL(Render_Vulkan, "{}", message); | ||
| 73 | } else if (severity & VK_DEBUG_REPORT_WARNING_BIT_EXT) { | ||
| 74 | LOG_WARNING(Render_Vulkan, "{}", message); | ||
| 75 | } else if (severity & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { | ||
| 76 | LOG_INFO(Render_Vulkan, "{}", message); | ||
| 77 | } else if (severity & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { | ||
| 78 | LOG_DEBUG(Render_Vulkan, "{}", message); | ||
| 79 | } | ||
| 80 | return VK_FALSE; | ||
| 81 | } | ||
| 65 | } // Anonymous namespace | 82 | } // Anonymous namespace |
| 66 | 83 | ||
| 67 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { | 84 | vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) { |
| 68 | return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ | 85 | return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ |
| 69 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, | 86 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, |
| 70 | .pNext = nullptr, | 87 | .pNext = nullptr, |
| @@ -76,7 +93,18 @@ vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { | |||
| 76 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | | 93 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | |
| 77 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | | 94 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | |
| 78 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, | 95 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, |
| 79 | .pfnUserCallback = Callback, | 96 | .pfnUserCallback = DebugUtilCallback, |
| 97 | .pUserData = nullptr, | ||
| 98 | }); | ||
| 99 | } | ||
| 100 | |||
| 101 | vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance) { | ||
| 102 | return instance.CreateDebugReportCallback({ | ||
| 103 | .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, | ||
| 104 | .pNext = nullptr, | ||
| 105 | .flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT | | ||
| 106 | VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT, | ||
| 107 | .pfnCallback = DebugReportCallback, | ||
| 80 | .pUserData = nullptr, | 108 | .pUserData = nullptr, |
| 81 | }); | 109 | }); |
| 82 | } | 110 | } |
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h index 71b1f69ec..a8af7b406 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.h +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | namespace Vulkan { | 8 | namespace Vulkan { |
| 9 | 9 | ||
| 10 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); | 10 | vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance); |
| 11 | |||
| 12 | vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance); | ||
| 11 | 13 | ||
| 12 | } // namespace Vulkan | 14 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e4ca65b58..70436cf1c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -349,7 +349,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 349 | const bool is_s8gen2 = device_id == 0x43050a01; | 349 | const bool is_s8gen2 = device_id == 0x43050a01; |
| 350 | const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; | 350 | const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; |
| 351 | 351 | ||
| 352 | if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) { | 352 | if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) { |
| 353 | LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); | 353 | LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); |
| 354 | } else if (!is_suitable) { | 354 | } else if (!is_suitable) { |
| 355 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | 355 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); |
| @@ -905,6 +905,10 @@ bool Device::GetSuitability(bool requires_swapchain) { | |||
| 905 | properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; | 905 | properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; |
| 906 | SetNext(next, properties.driver); | 906 | SetNext(next, properties.driver); |
| 907 | 907 | ||
| 908 | // Retrieve subgroup properties. | ||
| 909 | properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; | ||
| 910 | SetNext(next, properties.subgroup_properties); | ||
| 911 | |||
| 908 | // Retrieve relevant extension properties. | 912 | // Retrieve relevant extension properties. |
| 909 | if (extensions.shader_float_controls) { | 913 | if (extensions.shader_float_controls) { |
| 910 | properties.float_controls.sType = | 914 | properties.float_controls.sType = |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b84af3dfb..1f17265d5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -293,6 +293,11 @@ public: | |||
| 293 | return features.features.textureCompressionASTC_LDR; | 293 | return features.features.textureCompressionASTC_LDR; |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | /// Returns true if BCn is natively supported. | ||
| 297 | bool IsOptimalBcnSupported() const { | ||
| 298 | return features.features.textureCompressionBC; | ||
| 299 | } | ||
| 300 | |||
| 296 | /// Returns true if descriptor aliasing is natively supported. | 301 | /// Returns true if descriptor aliasing is natively supported. |
| 297 | bool IsDescriptorAliasingSupported() const { | 302 | bool IsDescriptorAliasingSupported() const { |
| 298 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; | 303 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; |
| @@ -323,6 +328,11 @@ public: | |||
| 323 | return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; | 328 | return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; |
| 324 | } | 329 | } |
| 325 | 330 | ||
| 331 | /// Returns true if the device supports the provided subgroup feature. | ||
| 332 | bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const { | ||
| 333 | return properties.subgroup_properties.supportedOperations & feature; | ||
| 334 | } | ||
| 335 | |||
| 326 | /// Returns the maximum number of push descriptors. | 336 | /// Returns the maximum number of push descriptors. |
| 327 | u32 MaxPushDescriptors() const { | 337 | u32 MaxPushDescriptors() const { |
| 328 | return properties.push_descriptor.maxPushDescriptors; | 338 | return properties.push_descriptor.maxPushDescriptors; |
| @@ -388,6 +398,11 @@ public: | |||
| 388 | return extensions.swapchain_mutable_format; | 398 | return extensions.swapchain_mutable_format; |
| 389 | } | 399 | } |
| 390 | 400 | ||
| 401 | /// Returns true if VK_KHR_shader_float_controls is enabled. | ||
| 402 | bool IsKhrShaderFloatControlsSupported() const { | ||
| 403 | return extensions.shader_float_controls; | ||
| 404 | } | ||
| 405 | |||
| 391 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. | 406 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. |
| 392 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { | 407 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { |
| 393 | return extensions.workgroup_memory_explicit_layout; | 408 | return extensions.workgroup_memory_explicit_layout; |
| @@ -413,6 +428,11 @@ public: | |||
| 413 | return extensions.sampler_filter_minmax; | 428 | return extensions.sampler_filter_minmax; |
| 414 | } | 429 | } |
| 415 | 430 | ||
| 431 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | ||
| 432 | bool IsExtShaderStencilExportSupported() const { | ||
| 433 | return extensions.shader_stencil_export; | ||
| 434 | } | ||
| 435 | |||
| 416 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. | 436 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. |
| 417 | bool IsExtDepthRangeUnrestrictedSupported() const { | 437 | bool IsExtDepthRangeUnrestrictedSupported() const { |
| 418 | return extensions.depth_range_unrestricted; | 438 | return extensions.depth_range_unrestricted; |
| @@ -482,9 +502,9 @@ public: | |||
| 482 | return extensions.vertex_input_dynamic_state; | 502 | return extensions.vertex_input_dynamic_state; |
| 483 | } | 503 | } |
| 484 | 504 | ||
| 485 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | 505 | /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation |
| 486 | bool IsExtShaderStencilExportSupported() const { | 506 | bool IsExtShaderDemoteToHelperInvocationSupported() const { |
| 487 | return extensions.shader_stencil_export; | 507 | return extensions.shader_demote_to_helper_invocation; |
| 488 | } | 508 | } |
| 489 | 509 | ||
| 490 | /// Returns true if the device supports VK_EXT_conservative_rasterization. | 510 | /// Returns true if the device supports VK_EXT_conservative_rasterization. |
| @@ -518,12 +538,12 @@ public: | |||
| 518 | if (extensions.spirv_1_4) { | 538 | if (extensions.spirv_1_4) { |
| 519 | return 0x00010400U; | 539 | return 0x00010400U; |
| 520 | } | 540 | } |
| 521 | return 0x00010000U; | 541 | return 0x00010300U; |
| 522 | } | 542 | } |
| 523 | 543 | ||
| 524 | /// Returns true when a known debugging tool is attached. | 544 | /// Returns true when a known debugging tool is attached. |
| 525 | bool HasDebuggingToolAttached() const { | 545 | bool HasDebuggingToolAttached() const { |
| 526 | return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); | 546 | return has_renderdoc || has_nsight_graphics; |
| 527 | } | 547 | } |
| 528 | 548 | ||
| 529 | /// @returns True if compute pipelines can cause crashing. | 549 | /// @returns True if compute pipelines can cause crashing. |
| @@ -588,6 +608,10 @@ public: | |||
| 588 | return properties.properties.limits.maxVertexInputBindings; | 608 | return properties.properties.limits.maxVertexInputBindings; |
| 589 | } | 609 | } |
| 590 | 610 | ||
| 611 | u32 GetMaxViewports() const { | ||
| 612 | return properties.properties.limits.maxViewports; | ||
| 613 | } | ||
| 614 | |||
| 591 | bool SupportsConditionalBarriers() const { | 615 | bool SupportsConditionalBarriers() const { |
| 592 | return supports_conditional_barriers; | 616 | return supports_conditional_barriers; |
| 593 | } | 617 | } |
| @@ -680,6 +704,7 @@ private: | |||
| 680 | 704 | ||
| 681 | struct Properties { | 705 | struct Properties { |
| 682 | VkPhysicalDeviceDriverProperties driver{}; | 706 | VkPhysicalDeviceDriverProperties driver{}; |
| 707 | VkPhysicalDeviceSubgroupProperties subgroup_properties{}; | ||
| 683 | VkPhysicalDeviceFloatControlsProperties float_controls{}; | 708 | VkPhysicalDeviceFloatControlsProperties float_controls{}; |
| 684 | VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; | 709 | VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; |
| 685 | VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; | 710 | VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; |
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index b6d83e446..7624a9b32 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp | |||
| @@ -31,10 +31,34 @@ | |||
| 31 | 31 | ||
| 32 | namespace Vulkan { | 32 | namespace Vulkan { |
| 33 | namespace { | 33 | namespace { |
| 34 | |||
| 35 | [[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, | ||
| 36 | std::span<const char* const> extensions) { | ||
| 37 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 38 | if (!properties) { | ||
| 39 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 40 | return false; | ||
| 41 | } | ||
| 42 | for (const char* extension : extensions) { | ||
| 43 | const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { | ||
| 44 | return std::strcmp(extension, prop.extensionName) == 0; | ||
| 45 | }); | ||
| 46 | if (it == properties->end()) { | ||
| 47 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | return true; | ||
| 52 | } | ||
| 53 | |||
| 34 | [[nodiscard]] std::vector<const char*> RequiredExtensions( | 54 | [[nodiscard]] std::vector<const char*> RequiredExtensions( |
| 35 | Core::Frontend::WindowSystemType window_type, bool enable_validation) { | 55 | const vk::InstanceDispatch& dld, Core::Frontend::WindowSystemType window_type, |
| 56 | bool enable_validation) { | ||
| 36 | std::vector<const char*> extensions; | 57 | std::vector<const char*> extensions; |
| 37 | extensions.reserve(6); | 58 | extensions.reserve(6); |
| 59 | #ifdef __APPLE__ | ||
| 60 | extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); | ||
| 61 | #endif | ||
| 38 | switch (window_type) { | 62 | switch (window_type) { |
| 39 | case Core::Frontend::WindowSystemType::Headless: | 63 | case Core::Frontend::WindowSystemType::Headless: |
| 40 | break; | 64 | break; |
| @@ -66,35 +90,14 @@ namespace { | |||
| 66 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | 90 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); |
| 67 | } | 91 | } |
| 68 | if (enable_validation) { | 92 | if (enable_validation) { |
| 69 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | 93 | const bool debug_utils = |
| 94 | AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME}); | ||
| 95 | extensions.push_back(debug_utils ? VK_EXT_DEBUG_UTILS_EXTENSION_NAME | ||
| 96 | : VK_EXT_DEBUG_REPORT_EXTENSION_NAME); | ||
| 70 | } | 97 | } |
| 71 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | ||
| 72 | |||
| 73 | #ifdef __APPLE__ | ||
| 74 | extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); | ||
| 75 | #endif | ||
| 76 | return extensions; | 98 | return extensions; |
| 77 | } | 99 | } |
| 78 | 100 | ||
| 79 | [[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, | ||
| 80 | std::span<const char* const> extensions) { | ||
| 81 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 82 | if (!properties) { | ||
| 83 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | for (const char* extension : extensions) { | ||
| 87 | const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { | ||
| 88 | return std::strcmp(extension, prop.extensionName) == 0; | ||
| 89 | }); | ||
| 90 | if (it == properties->end()) { | ||
| 91 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 92 | return false; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | return true; | ||
| 96 | } | ||
| 97 | |||
| 98 | [[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { | 101 | [[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { |
| 99 | std::vector<const char*> layers; | 102 | std::vector<const char*> layers; |
| 100 | if (enable_validation) { | 103 | if (enable_validation) { |
| @@ -138,7 +141,8 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD | |||
| 138 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | 141 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); |
| 139 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | 142 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| 140 | } | 143 | } |
| 141 | const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); | 144 | const std::vector<const char*> extensions = |
| 145 | RequiredExtensions(dld, window_type, enable_validation); | ||
| 142 | if (!AreExtensionsSupported(dld, extensions)) { | 146 | if (!AreExtensionsSupported(dld, extensions)) { |
| 143 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | 147 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); |
| 144 | } | 148 | } |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 28fcb21a0..2fa29793a 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -259,7 +259,9 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { | |||
| 259 | // These functions may fail to load depending on the enabled extensions. | 259 | // These functions may fail to load depending on the enabled extensions. |
| 260 | // Don't return a failure on these. | 260 | // Don't return a failure on these. |
| 261 | X(vkCreateDebugUtilsMessengerEXT); | 261 | X(vkCreateDebugUtilsMessengerEXT); |
| 262 | X(vkCreateDebugReportCallbackEXT); | ||
| 262 | X(vkDestroyDebugUtilsMessengerEXT); | 263 | X(vkDestroyDebugUtilsMessengerEXT); |
| 264 | X(vkDestroyDebugReportCallbackEXT); | ||
| 263 | X(vkDestroySurfaceKHR); | 265 | X(vkDestroySurfaceKHR); |
| 264 | X(vkGetPhysicalDeviceFeatures2); | 266 | X(vkGetPhysicalDeviceFeatures2); |
| 265 | X(vkGetPhysicalDeviceProperties2); | 267 | X(vkGetPhysicalDeviceProperties2); |
| @@ -481,6 +483,11 @@ void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle, | |||
| 481 | dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); | 483 | dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); |
| 482 | } | 484 | } |
| 483 | 485 | ||
| 486 | void Destroy(VkInstance instance, VkDebugReportCallbackEXT handle, | ||
| 487 | const InstanceDispatch& dld) noexcept { | ||
| 488 | dld.vkDestroyDebugReportCallbackEXT(instance, handle, nullptr); | ||
| 489 | } | ||
| 490 | |||
| 484 | void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { | 491 | void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { |
| 485 | dld.vkDestroySurfaceKHR(instance, handle, nullptr); | 492 | dld.vkDestroySurfaceKHR(instance, handle, nullptr); |
| 486 | } | 493 | } |
| @@ -549,6 +556,13 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( | |||
| 549 | return DebugUtilsMessenger(object, handle, *dld); | 556 | return DebugUtilsMessenger(object, handle, *dld); |
| 550 | } | 557 | } |
| 551 | 558 | ||
| 559 | DebugReportCallback Instance::CreateDebugReportCallback( | ||
| 560 | const VkDebugReportCallbackCreateInfoEXT& create_info) const { | ||
| 561 | VkDebugReportCallbackEXT object; | ||
| 562 | Check(dld->vkCreateDebugReportCallbackEXT(handle, &create_info, nullptr, &object)); | ||
| 563 | return DebugReportCallback(object, handle, *dld); | ||
| 564 | } | ||
| 565 | |||
| 552 | void Image::SetObjectNameEXT(const char* name) const { | 566 | void Image::SetObjectNameEXT(const char* name) const { |
| 553 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); | 567 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); |
| 554 | } | 568 | } |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 44fce47a5..b5e70fcd4 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -164,8 +164,10 @@ struct InstanceDispatch { | |||
| 164 | PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{}; | 164 | PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{}; |
| 165 | 165 | ||
| 166 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{}; | 166 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{}; |
| 167 | PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT{}; | ||
| 167 | PFN_vkCreateDevice vkCreateDevice{}; | 168 | PFN_vkCreateDevice vkCreateDevice{}; |
| 168 | PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{}; | 169 | PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{}; |
| 170 | PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallbackEXT{}; | ||
| 169 | PFN_vkDestroyDevice vkDestroyDevice{}; | 171 | PFN_vkDestroyDevice vkDestroyDevice{}; |
| 170 | PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{}; | 172 | PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{}; |
| 171 | PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{}; | 173 | PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{}; |
| @@ -366,6 +368,7 @@ void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept; | |||
| 366 | void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; | 368 | void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; |
| 367 | void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; | 369 | void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; |
| 368 | void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; | 370 | void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; |
| 371 | void Destroy(VkInstance, VkDebugReportCallbackEXT, const InstanceDispatch&) noexcept; | ||
| 369 | void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; | 372 | void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; |
| 370 | 373 | ||
| 371 | VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; | 374 | VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; |
| @@ -581,6 +584,7 @@ private: | |||
| 581 | }; | 584 | }; |
| 582 | 585 | ||
| 583 | using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; | 586 | using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; |
| 587 | using DebugReportCallback = Handle<VkDebugReportCallbackEXT, VkInstance, InstanceDispatch>; | ||
| 584 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; | 588 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; |
| 585 | using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>; | 589 | using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>; |
| 586 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; | 590 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; |
| @@ -613,6 +617,11 @@ public: | |||
| 613 | DebugUtilsMessenger CreateDebugUtilsMessenger( | 617 | DebugUtilsMessenger CreateDebugUtilsMessenger( |
| 614 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; | 618 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; |
| 615 | 619 | ||
| 620 | /// Creates a debug report callback. | ||
| 621 | /// @throw Exception on creation failure. | ||
| 622 | DebugReportCallback CreateDebugReportCallback( | ||
| 623 | const VkDebugReportCallbackCreateInfoEXT& create_info) const; | ||
| 624 | |||
| 616 | /// Returns dispatch table. | 625 | /// Returns dispatch table. |
| 617 | const InstanceDispatch& Dispatch() const noexcept { | 626 | const InstanceDispatch& Dispatch() const noexcept { |
| 618 | return *dld; | 627 | return *dld; |