summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--externals/CMakeLists.txt4
-rw-r--r--externals/stb/stb_dxt.cpp765
-rw-r--r--externals/stb/stb_dxt.h36
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h9
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp31
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp8
-rw-r--r--src/video_core/texture_cache/util.cpp77
-rw-r--r--src/video_core/textures/astc.cpp5
-rw-r--r--src/video_core/textures/bcn.cpp87
-rw-r--r--src/video_core/textures/bcn.h17
-rw-r--r--src/video_core/textures/workers.cpp15
-rw-r--r--src/video_core/textures/workers.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp6
-rw-r--r--src/yuzu/configuration/config.cpp5
-rw-r--r--src/yuzu/configuration/config.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp14
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui44
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h4
23 files changed, 1149 insertions, 26 deletions
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index f2a560f04..e59eeb489 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -139,3 +139,7 @@ if (NOT TARGET LLVM::Demangle)
139 target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp) 139 target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp)
140 add_library(LLVM::Demangle ALIAS demangle) 140 add_library(LLVM::Demangle ALIAS demangle)
141endif() 141endif()
142
143add_library(stb STATIC)
144target_include_directories(stb PUBLIC ./stb)
145target_sources(stb PRIVATE stb/stb_dxt.cpp)
diff --git a/externals/stb/stb_dxt.cpp b/externals/stb/stb_dxt.cpp
new file mode 100644
index 000000000..64f1f3d03
--- /dev/null
+++ b/externals/stb/stb_dxt.cpp
@@ -0,0 +1,765 @@
1// SPDX-FileCopyrightText: fabian "ryg" giesen
2// SPDX-License-Identifier: MIT
3
4// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
5
6#include <stb_dxt.h>
7
8#include <stdlib.h>
9#include <string.h>
10
11#if !defined(STBD_FABS)
12#include <math.h>
13#endif
14
15#ifndef STBD_FABS
16#define STBD_FABS(x) fabs(x)
17#endif
18
19static const unsigned char stb__OMatch5[256][2] = {
20 {0, 0}, {0, 0}, {0, 1}, {0, 1}, {1, 0}, {1, 0}, {1, 0}, {1, 1}, {1, 1},
21 {1, 1}, {1, 2}, {0, 4}, {2, 1}, {2, 1}, {2, 1}, {2, 2}, {2, 2}, {2, 2},
22 {2, 3}, {1, 5}, {3, 2}, {3, 2}, {4, 0}, {3, 3}, {3, 3}, {3, 3}, {3, 4},
23 {3, 4}, {3, 4}, {3, 5}, {4, 3}, {4, 3}, {5, 2}, {4, 4}, {4, 4}, {4, 5},
24 {4, 5}, {5, 4}, {5, 4}, {5, 4}, {6, 3}, {5, 5}, {5, 5}, {5, 6}, {4, 8},
25 {6, 5}, {6, 5}, {6, 5}, {6, 6}, {6, 6}, {6, 6}, {6, 7}, {5, 9}, {7, 6},
26 {7, 6}, {8, 4}, {7, 7}, {7, 7}, {7, 7}, {7, 8}, {7, 8}, {7, 8}, {7, 9},
27 {8, 7}, {8, 7}, {9, 6}, {8, 8}, {8, 8}, {8, 9}, {8, 9}, {9, 8}, {9, 8},
28 {9, 8}, {10, 7}, {9, 9}, {9, 9}, {9, 10}, {8, 12}, {10, 9}, {10, 9}, {10, 9},
29 {10, 10}, {10, 10}, {10, 10}, {10, 11}, {9, 13}, {11, 10}, {11, 10}, {12, 8}, {11, 11},
30 {11, 11}, {11, 11}, {11, 12}, {11, 12}, {11, 12}, {11, 13}, {12, 11}, {12, 11}, {13, 10},
31 {12, 12}, {12, 12}, {12, 13}, {12, 13}, {13, 12}, {13, 12}, {13, 12}, {14, 11}, {13, 13},
32 {13, 13}, {13, 14}, {12, 16}, {14, 13}, {14, 13}, {14, 13}, {14, 14}, {14, 14}, {14, 14},
33 {14, 15}, {13, 17}, {15, 14}, {15, 14}, {16, 12}, {15, 15}, {15, 15}, {15, 15}, {15, 16},
34 {15, 16}, {15, 16}, {15, 17}, {16, 15}, {16, 15}, {17, 14}, {16, 16}, {16, 16}, {16, 17},
35 {16, 17}, {17, 16}, {17, 16}, {17, 16}, {18, 15}, {17, 17}, {17, 17}, {17, 18}, {16, 20},
36 {18, 17}, {18, 17}, {18, 17}, {18, 18}, {18, 18}, {18, 18}, {18, 19}, {17, 21}, {19, 18},
37 {19, 18}, {20, 16}, {19, 19}, {19, 19}, {19, 19}, {19, 20}, {19, 20}, {19, 20}, {19, 21},
38 {20, 19}, {20, 19}, {21, 18}, {20, 20}, {20, 20}, {20, 21}, {20, 21}, {21, 20}, {21, 20},
39 {21, 20}, {22, 19}, {21, 21}, {21, 21}, {21, 22}, {20, 24}, {22, 21}, {22, 21}, {22, 21},
40 {22, 22}, {22, 22}, {22, 22}, {22, 23}, {21, 25}, {23, 22}, {23, 22}, {24, 20}, {23, 23},
41 {23, 23}, {23, 23}, {23, 24}, {23, 24}, {23, 24}, {23, 25}, {24, 23}, {24, 23}, {25, 22},
42 {24, 24}, {24, 24}, {24, 25}, {24, 25}, {25, 24}, {25, 24}, {25, 24}, {26, 23}, {25, 25},
43 {25, 25}, {25, 26}, {24, 28}, {26, 25}, {26, 25}, {26, 25}, {26, 26}, {26, 26}, {26, 26},
44 {26, 27}, {25, 29}, {27, 26}, {27, 26}, {28, 24}, {27, 27}, {27, 27}, {27, 27}, {27, 28},
45 {27, 28}, {27, 28}, {27, 29}, {28, 27}, {28, 27}, {29, 26}, {28, 28}, {28, 28}, {28, 29},
46 {28, 29}, {29, 28}, {29, 28}, {29, 28}, {30, 27}, {29, 29}, {29, 29}, {29, 30}, {29, 30},
47 {30, 29}, {30, 29}, {30, 29}, {30, 30}, {30, 30}, {30, 30}, {30, 31}, {30, 31}, {31, 30},
48 {31, 30}, {31, 30}, {31, 31}, {31, 31},
49};
50static const unsigned char stb__OMatch6[256][2] = {
51 {0, 0}, {0, 1}, {1, 0}, {1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 2},
52 {2, 3}, {3, 2}, {3, 3}, {3, 3}, {3, 4}, {4, 3}, {4, 4}, {4, 4}, {4, 5},
53 {5, 4}, {5, 5}, {5, 5}, {5, 6}, {6, 5}, {6, 6}, {6, 6}, {6, 7}, {7, 6},
54 {7, 7}, {7, 7}, {7, 8}, {8, 7}, {8, 8}, {8, 8}, {8, 9}, {9, 8}, {9, 9},
55 {9, 9}, {9, 10}, {10, 9}, {10, 10}, {10, 10}, {10, 11}, {11, 10}, {8, 16}, {11, 11},
56 {11, 12}, {12, 11}, {9, 17}, {12, 12}, {12, 13}, {13, 12}, {11, 16}, {13, 13}, {13, 14},
57 {14, 13}, {12, 17}, {14, 14}, {14, 15}, {15, 14}, {14, 16}, {15, 15}, {15, 16}, {16, 14},
58 {16, 15}, {17, 14}, {16, 16}, {16, 17}, {17, 16}, {18, 15}, {17, 17}, {17, 18}, {18, 17},
59 {20, 14}, {18, 18}, {18, 19}, {19, 18}, {21, 15}, {19, 19}, {19, 20}, {20, 19}, {20, 20},
60 {20, 20}, {20, 21}, {21, 20}, {21, 21}, {21, 21}, {21, 22}, {22, 21}, {22, 22}, {22, 22},
61 {22, 23}, {23, 22}, {23, 23}, {23, 23}, {23, 24}, {24, 23}, {24, 24}, {24, 24}, {24, 25},
62 {25, 24}, {25, 25}, {25, 25}, {25, 26}, {26, 25}, {26, 26}, {26, 26}, {26, 27}, {27, 26},
63 {24, 32}, {27, 27}, {27, 28}, {28, 27}, {25, 33}, {28, 28}, {28, 29}, {29, 28}, {27, 32},
64 {29, 29}, {29, 30}, {30, 29}, {28, 33}, {30, 30}, {30, 31}, {31, 30}, {30, 32}, {31, 31},
65 {31, 32}, {32, 30}, {32, 31}, {33, 30}, {32, 32}, {32, 33}, {33, 32}, {34, 31}, {33, 33},
66 {33, 34}, {34, 33}, {36, 30}, {34, 34}, {34, 35}, {35, 34}, {37, 31}, {35, 35}, {35, 36},
67 {36, 35}, {36, 36}, {36, 36}, {36, 37}, {37, 36}, {37, 37}, {37, 37}, {37, 38}, {38, 37},
68 {38, 38}, {38, 38}, {38, 39}, {39, 38}, {39, 39}, {39, 39}, {39, 40}, {40, 39}, {40, 40},
69 {40, 40}, {40, 41}, {41, 40}, {41, 41}, {41, 41}, {41, 42}, {42, 41}, {42, 42}, {42, 42},
70 {42, 43}, {43, 42}, {40, 48}, {43, 43}, {43, 44}, {44, 43}, {41, 49}, {44, 44}, {44, 45},
71 {45, 44}, {43, 48}, {45, 45}, {45, 46}, {46, 45}, {44, 49}, {46, 46}, {46, 47}, {47, 46},
72 {46, 48}, {47, 47}, {47, 48}, {48, 46}, {48, 47}, {49, 46}, {48, 48}, {48, 49}, {49, 48},
73 {50, 47}, {49, 49}, {49, 50}, {50, 49}, {52, 46}, {50, 50}, {50, 51}, {51, 50}, {53, 47},
74 {51, 51}, {51, 52}, {52, 51}, {52, 52}, {52, 52}, {52, 53}, {53, 52}, {53, 53}, {53, 53},
75 {53, 54}, {54, 53}, {54, 54}, {54, 54}, {54, 55}, {55, 54}, {55, 55}, {55, 55}, {55, 56},
76 {56, 55}, {56, 56}, {56, 56}, {56, 57}, {57, 56}, {57, 57}, {57, 57}, {57, 58}, {58, 57},
77 {58, 58}, {58, 58}, {58, 59}, {59, 58}, {59, 59}, {59, 59}, {59, 60}, {60, 59}, {60, 60},
78 {60, 60}, {60, 61}, {61, 60}, {61, 61}, {61, 61}, {61, 62}, {62, 61}, {62, 62}, {62, 62},
79 {62, 63}, {63, 62}, {63, 63}, {63, 63},
80};
81
82static int stb__Mul8Bit(int a, int b) {
83 int t = a * b + 128;
84 return (t + (t >> 8)) >> 8;
85}
86
87static void stb__From16Bit(unsigned char* out, unsigned short v) {
88 int rv = (v & 0xf800) >> 11;
89 int gv = (v & 0x07e0) >> 5;
90 int bv = (v & 0x001f) >> 0;
91
92 // expand to 8 bits via bit replication
93 out[0] = static_cast<unsigned char>((rv * 33) >> 2);
94 out[1] = static_cast<unsigned char>((gv * 65) >> 4);
95 out[2] = static_cast<unsigned char>((bv * 33) >> 2);
96 out[3] = 0;
97}
98
99static unsigned short stb__As16Bit(int r, int g, int b) {
100 return (unsigned short)((stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) +
101 stb__Mul8Bit(b, 31));
102}
103
104// linear interpolation at 1/3 point between a and b, using desired rounding
105// type
106static int stb__Lerp13(int a, int b) {
107#ifdef STB_DXT_USE_ROUNDING_BIAS
108 // with rounding bias
109 return a + stb__Mul8Bit(b - a, 0x55);
110#else
111 // without rounding bias
112 // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really
113 // need every ounce of speed.
114 return (2 * a + b) / 3;
115#endif
116}
117
118// linear interpolation at 1/2 point between a and b
119static int stb__Lerp12(int a, int b) {
120 return (a + b) / 2;
121}
122
123// lerp RGB color
124static void stb__Lerp13RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
125 out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
126 out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
127 out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
128}
129
130static void stb__Lerp12RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
131 out[0] = (unsigned char)stb__Lerp12(p1[0], p2[0]);
132 out[1] = (unsigned char)stb__Lerp12(p1[1], p2[1]);
133 out[2] = (unsigned char)stb__Lerp12(p1[2], p2[2]);
134}
135
136/****************************************************************************/
137
138static void stb__Eval4Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
139 stb__From16Bit(color + 0, c0);
140 stb__From16Bit(color + 4, c1);
141 stb__Lerp13RGB(color + 8, color + 0, color + 4);
142 stb__Lerp13RGB(color + 12, color + 4, color + 0);
143}
144
145static void stb__Eval3Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
146 stb__From16Bit(color + 0, c0);
147 stb__From16Bit(color + 4, c1);
148 stb__Lerp12RGB(color + 8, color + 0, color + 4);
149}
150
151// The color matching function
152static unsigned int stb__MatchColorsBlock(unsigned char* block, unsigned char* color) {
153 unsigned int mask = 0;
154 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
155 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
156 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
157 int dots[16];
158 int stops[4];
159 int i;
160 int c0Point, halfPoint, c3Point;
161
162 for (i = 0; i < 16; i++)
163 dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
164
165 for (i = 0; i < 4; i++)
166 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
167
168 // think of the colors as arranged on a line; project point onto that line,
169 // then choose next color out of available ones. we compute the crossover
170 // points for "best color in top half"/"best in bottom half" and then the same
171 // inside that subinterval.
172 //
173 // relying on this 1d approximation isn't always optimal in terms of euclidean
174 // distance, but it's very close and a lot faster.
175 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
176
177 c0Point = (stops[1] + stops[3]);
178 halfPoint = (stops[3] + stops[2]);
179 c3Point = (stops[2] + stops[0]);
180
181 for (i = 15; i >= 0; i--) {
182 int dot = dots[i] * 2;
183 mask <<= 2;
184
185 if (dot < halfPoint)
186 mask |= (dot < c0Point) ? 1 : 3;
187 else
188 mask |= (dot < c3Point) ? 2 : 0;
189 }
190
191 return mask;
192}
193
194static unsigned int stb__MatchColorsAlphaBlock(unsigned char* block, unsigned char* color) {
195 unsigned int mask = 0;
196 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
197 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
198 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
199 int dots[16];
200 int stops[3];
201 int i;
202 int c0Point, c2Point;
203
204 for (i = 0; i < 16; i++)
205 dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
206
207 for (i = 0; i < 3; i++)
208 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
209
210 c0Point = (stops[1] + stops[2]);
211 c2Point = (stops[2] + stops[0]);
212
213 for (i = 15; i >= 0; i--) {
214 int dot = dots[i] * 2;
215 mask <<= 2;
216
217 if (block[i * 4 + 3] == 0)
218 mask |= 3;
219 else if (dot < c2Point)
220 mask |= (dot < c0Point) ? 0 : 2;
221 else
222 mask |= (dot < c0Point) ? 1 : 0;
223 }
224
225 return mask;
226}
227
228static void stb__ReorderColors(unsigned short* pmax16, unsigned short* pmin16) {
229 if (*pmin16 < *pmax16) {
230 unsigned short t = *pmin16;
231 *pmin16 = *pmax16;
232 *pmax16 = t;
233 }
234}
235
236static void stb__FinalizeColors(unsigned short* pmax16, unsigned short* pmin16,
237 unsigned int* pmask) {
238 if (*pmax16 < *pmin16) {
239 unsigned short t = *pmin16;
240 *pmin16 = *pmax16;
241 *pmax16 = t;
242 *pmask ^= 0x55555555;
243 }
244}
245
246// The color optimization function. (Clever code, part 1)
247static void stb__OptimizeColorsBlock(unsigned char* block, unsigned short* pmax16,
248 unsigned short* pmin16) {
249 int mind, maxd;
250 unsigned char *minp, *maxp;
251 double magn;
252 int v_r, v_g, v_b;
253 static const int nIterPower = 4;
254 float covf[6], vfr, vfg, vfb;
255
256 // determine color distribution
257 int cov[6];
258 int mu[3], min[3], max[3];
259 int ch, i, iter;
260
261 for (ch = 0; ch < 3; ch++) {
262 const unsigned char* bp = ((const unsigned char*)block) + ch;
263 int muv, minv, maxv;
264
265 muv = minv = maxv = bp[0];
266 for (i = 4; i < 64; i += 4) {
267 muv += bp[i];
268 if (bp[i] < minv)
269 minv = bp[i];
270 else if (bp[i] > maxv)
271 maxv = bp[i];
272 }
273
274 mu[ch] = (muv + 8) >> 4;
275 min[ch] = minv;
276 max[ch] = maxv;
277 }
278
279 // determine covariance matrix
280 for (i = 0; i < 6; i++)
281 cov[i] = 0;
282
283 for (i = 0; i < 16; i++) {
284 int r = block[i * 4 + 0] - mu[0];
285 int g = block[i * 4 + 1] - mu[1];
286 int b = block[i * 4 + 2] - mu[2];
287
288 cov[0] += r * r;
289 cov[1] += r * g;
290 cov[2] += r * b;
291 cov[3] += g * g;
292 cov[4] += g * b;
293 cov[5] += b * b;
294 }
295
296 // convert covariance matrix to float, find principal axis via power iter
297 for (i = 0; i < 6; i++)
298 covf[i] = static_cast<float>(cov[i]) / 255.0f;
299
300 vfr = (float)(max[0] - min[0]);
301 vfg = (float)(max[1] - min[1]);
302 vfb = (float)(max[2] - min[2]);
303
304 for (iter = 0; iter < nIterPower; iter++) {
305 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
306 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
307 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
308
309 vfr = r;
310 vfg = g;
311 vfb = b;
312 }
313
314 magn = STBD_FABS(vfr);
315 if (STBD_FABS(vfg) > magn)
316 magn = STBD_FABS(vfg);
317 if (STBD_FABS(vfb) > magn)
318 magn = STBD_FABS(vfb);
319
320 if (magn < 4.0f) { // too small, default to luminance
321 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
322 v_g = 587;
323 v_b = 114;
324 } else {
325 magn = 512.0 / magn;
326 v_r = (int)(vfr * magn);
327 v_g = (int)(vfg * magn);
328 v_b = (int)(vfb * magn);
329 }
330
331 minp = maxp = block;
332 mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
333 // Pick colors at extreme points
334 for (i = 1; i < 16; i++) {
335 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
336
337 if (dot < mind) {
338 mind = dot;
339 minp = block + i * 4;
340 }
341
342 if (dot > maxd) {
343 maxd = dot;
344 maxp = block + i * 4;
345 }
346 }
347
348 *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
349 *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
350 stb__ReorderColors(pmax16, pmin16);
351}
352
353static void stb__OptimizeColorsAlphaBlock(unsigned char* block, unsigned short* pmax16,
354 unsigned short* pmin16) {
355 int mind, maxd;
356 unsigned char *minp, *maxp;
357 double magn;
358 int v_r, v_g, v_b;
359 static const int nIterPower = 4;
360 float covf[6], vfr, vfg, vfb;
361
362 // determine color distribution
363 int cov[6];
364 int mu[3], min[3], max[3];
365 int ch, i, iter;
366
367 for (ch = 0; ch < 3; ch++) {
368 const unsigned char* bp = ((const unsigned char*)block) + ch;
369 int muv = 0, minv = 256, maxv = -1;
370 int num = 0;
371
372 for (i = 0; i < 64; i += 4) {
373 if (bp[3 - ch] == 0) {
374 continue;
375 }
376
377 muv += bp[i];
378 if (bp[i] < minv)
379 minv = bp[i];
380 else if (bp[i] > maxv)
381 maxv = bp[i];
382
383 num++;
384 }
385
386 mu[ch] = num > 0 ? (muv + 8) / num : 0;
387 min[ch] = minv;
388 max[ch] = maxv;
389 }
390
391 // determine covariance matrix
392 for (i = 0; i < 6; i++)
393 cov[i] = 0;
394
395 for (i = 0; i < 16; i++) {
396 if (block[i * 4 + 3] == 0) {
397 continue;
398 }
399
400 int r = block[i * 4 + 0] - mu[0];
401 int g = block[i * 4 + 1] - mu[1];
402 int b = block[i * 4 + 2] - mu[2];
403
404 cov[0] += r * r;
405 cov[1] += r * g;
406 cov[2] += r * b;
407 cov[3] += g * g;
408 cov[4] += g * b;
409 cov[5] += b * b;
410 }
411
412 // convert covariance matrix to float, find principal axis via power iter
413 for (i = 0; i < 6; i++)
414 covf[i] = static_cast<float>(cov[i]) / 255.0f;
415
416 vfr = (float)(max[0] - min[0]);
417 vfg = (float)(max[1] - min[1]);
418 vfb = (float)(max[2] - min[2]);
419
420 for (iter = 0; iter < nIterPower; iter++) {
421 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
422 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
423 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
424
425 vfr = r;
426 vfg = g;
427 vfb = b;
428 }
429
430 magn = STBD_FABS(vfr);
431 if (STBD_FABS(vfg) > magn)
432 magn = STBD_FABS(vfg);
433 if (STBD_FABS(vfb) > magn)
434 magn = STBD_FABS(vfb);
435
436 if (magn < 4.0f) { // too small, default to luminance
437 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
438 v_g = 587;
439 v_b = 114;
440 } else {
441 magn = 512.0 / magn;
442 v_r = (int)(vfr * magn);
443 v_g = (int)(vfg * magn);
444 v_b = (int)(vfb * magn);
445 }
446
447 minp = maxp = NULL;
448 mind = 0x7fffffff;
449 maxd = -0x80000000;
450
451 // Pick colors at extreme points
452 for (i = 0; i < 16; i++) {
453 if (block[i * 4 + 3] == 0) {
454 continue;
455 }
456
457 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
458
459 if (dot < mind) {
460 mind = dot;
461 minp = block + i * 4;
462 }
463
464 if (dot > maxd) {
465 maxd = dot;
466 maxp = block + i * 4;
467 }
468 }
469
470 if (!maxp) {
471 // all alpha, no color
472 *pmin16 = 0xffff;
473 *pmax16 = 0;
474 } else {
475 // endpoint colors found
476 *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
477 *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
478
479 if (*pmax16 == *pmin16) {
480 // modify the endpoints to indicate presence of an alpha block
481 if (*pmax16 > 0) {
482 (*pmax16)--;
483 } else {
484 (*pmin16)++;
485 }
486 }
487
488 stb__ReorderColors(pmax16, pmin16);
489 }
490}
491
492static const float stb__midpoints5[32] = {
493 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f,
494 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
495 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f,
496 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f};
497
498static const float stb__midpoints6[64] = {
499 0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f,
500 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
501 0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f,
502 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
503 0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f,
504 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
505 0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f,
506 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f};
507
508static unsigned short stb__Quantize5(float x) {
509 unsigned short q;
510 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
511 q = (unsigned short)(x * 31);
512 q += (x > stb__midpoints5[q]);
513 return q;
514}
515
516static unsigned short stb__Quantize6(float x) {
517 unsigned short q;
518 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
519 q = (unsigned short)(x * 63);
520 q += (x > stb__midpoints6[q]);
521 return q;
522}
523
524// The refinement function. (Clever code, part 2)
525// Tries to optimize colors to suit block contents better.
526// (By solving a least squares system via normal equations+Cramer's rule)
527static int stb__RefineBlock(unsigned char* block, unsigned short* pmax16, unsigned short* pmin16,
528 unsigned int mask) {
529 static const int w1Tab[4] = {3, 0, 2, 1};
530 static const int prods[4] = {0x090000, 0x000900, 0x040102, 0x010402};
531 // ^some magic to save a lot of multiplies in the accumulating loop...
532 // (precomputed products of weights for least squares system, accumulated
533 // inside one 32-bit register)
534
535 float f;
536 unsigned short oldMin, oldMax, min16, max16;
537 int i, akku = 0, xx, xy, yy;
538 int At1_r, At1_g, At1_b;
539 int At2_r, At2_g, At2_b;
540 unsigned int cm = mask;
541
542 oldMin = *pmin16;
543 oldMax = *pmax16;
544
545 if ((mask ^ (mask << 2)) < 4) // all pixels have the same index?
546 {
547 // yes, linear system would be singular; solve using optimal
548 // single-color match on average color
549 int r = 8, g = 8, b = 8;
550 for (i = 0; i < 16; ++i) {
551 r += block[i * 4 + 0];
552 g += block[i * 4 + 1];
553 b += block[i * 4 + 2];
554 }
555
556 r >>= 4;
557 g >>= 4;
558 b >>= 4;
559
560 max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
561 stb__OMatch5[b][0]);
562 min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
563 stb__OMatch5[b][1]);
564 } else {
565 At1_r = At1_g = At1_b = 0;
566 At2_r = At2_g = At2_b = 0;
567 for (i = 0; i < 16; ++i, cm >>= 2) {
568 int step = cm & 3;
569 int w1 = w1Tab[step];
570 int r = block[i * 4 + 0];
571 int g = block[i * 4 + 1];
572 int b = block[i * 4 + 2];
573
574 akku += prods[step];
575 At1_r += w1 * r;
576 At1_g += w1 * g;
577 At1_b += w1 * b;
578 At2_r += r;
579 At2_g += g;
580 At2_b += b;
581 }
582
583 At2_r = 3 * At2_r - At1_r;
584 At2_g = 3 * At2_g - At1_g;
585 At2_b = 3 * At2_b - At1_b;
586
587 // extract solutions and decide solvability
588 xx = akku >> 16;
589 yy = (akku >> 8) & 0xff;
590 xy = (akku >> 0) & 0xff;
591
592 f = 3.0f / 255.0f / static_cast<float>(xx * yy - xy * xy);
593
594 max16 = static_cast<unsigned short>(
595 stb__Quantize5(static_cast<float>(At1_r * yy - At2_r * xy) * f) << 11);
596 max16 |= static_cast<unsigned short>(
597 stb__Quantize6(static_cast<float>(At1_g * yy - At2_g * xy) * f) << 5);
598 max16 |= static_cast<unsigned short>(
599 stb__Quantize5(static_cast<float>(At1_b * yy - At2_b * xy) * f) << 0);
600
601 min16 = static_cast<unsigned short>(
602 stb__Quantize5(static_cast<float>(At2_r * xx - At1_r * xy) * f) << 11);
603 min16 |= static_cast<unsigned short>(
604 stb__Quantize6(static_cast<float>(At2_g * xx - At1_g * xy) * f) << 5);
605 min16 |= static_cast<unsigned short>(
606 stb__Quantize5(static_cast<float>(At2_b * xx - At1_b * xy) * f) << 0);
607 }
608
609 *pmin16 = min16;
610 *pmax16 = max16;
611 stb__ReorderColors(pmax16, pmin16);
612
613 return oldMin != min16 || oldMax != max16;
614}
615
616// Color block compression
617static void stb__CompressColorBlock(unsigned char* dest, unsigned char* block, int alpha,
618 int mode) {
619 unsigned int mask;
620 int i;
621 int refinecount;
622 unsigned short max16, min16;
623 unsigned char color[4 * 4];
624
625 refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
626
627 // check if block is constant
628 for (i = 1; i < 16; i++)
629 if (((unsigned int*)block)[i] != ((unsigned int*)block)[0])
630 break;
631
632 if (i == 16 && block[3] == 0 && alpha) { // constant alpha
633 mask = 0xffffffff;
634 max16 = 0;
635 min16 = 0xffff;
636 } else if (i == 16) { // constant color
637 int r = block[0], g = block[1], b = block[2];
638 mask = 0xaaaaaaaa;
639 max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
640 stb__OMatch5[b][0]);
641 min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
642 stb__OMatch5[b][1]);
643 } else if (alpha) {
644 stb__OptimizeColorsAlphaBlock(block, &max16, &min16);
645 stb__Eval3Colors(color, max16, min16);
646 mask = stb__MatchColorsAlphaBlock(block, color);
647 } else {
648 // first step: PCA+map along principal axis
649 stb__OptimizeColorsBlock(block, &max16, &min16);
650 if (max16 != min16) {
651 stb__Eval4Colors(color, max16, min16);
652 mask = stb__MatchColorsBlock(block, color);
653 } else
654 mask = 0;
655
656 // third step: refine (multiple times if requested)
657 for (i = 0; i < refinecount; i++) {
658 unsigned int lastmask = mask;
659
660 if (stb__RefineBlock(block, &max16, &min16, mask)) {
661 if (max16 != min16) {
662 stb__Eval4Colors(color, max16, min16);
663 mask = stb__MatchColorsBlock(block, color);
664 } else {
665 mask = 0;
666 break;
667 }
668 }
669
670 if (mask == lastmask)
671 break;
672 }
673 }
674
675 // write the color block
676 if (!alpha)
677 stb__FinalizeColors(&max16, &min16, &mask);
678
679 dest[0] = (unsigned char)(max16);
680 dest[1] = (unsigned char)(max16 >> 8);
681 dest[2] = (unsigned char)(min16);
682 dest[3] = (unsigned char)(min16 >> 8);
683 dest[4] = (unsigned char)(mask);
684 dest[5] = (unsigned char)(mask >> 8);
685 dest[6] = (unsigned char)(mask >> 16);
686 dest[7] = (unsigned char)(mask >> 24);
687}
688
689// Alpha block compression (this is easy for a change)
690static void stb__CompressAlphaBlock(unsigned char* dest, unsigned char* src, int stride) {
691 int i, dist, bias, dist4, dist2, bits, mask;
692
693 // find min/max color
694 int mn, mx;
695 mn = mx = src[0];
696
697 for (i = 1; i < 16; i++) {
698 if (src[i * stride] < mn)
699 mn = src[i * stride];
700 else if (src[i * stride] > mx)
701 mx = src[i * stride];
702 }
703
704 // encode them
705 dest[0] = (unsigned char)mx;
706 dest[1] = (unsigned char)mn;
707 dest += 2;
708
709 // determine bias and emit color indices
710 // given the choice of mx/mn, these indices are optimal:
711 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
712 dist = mx - mn;
713 dist4 = dist * 4;
714 dist2 = dist * 2;
715 bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
716 bias -= mn * 7;
717 bits = 0, mask = 0;
718
719 for (i = 0; i < 16; i++) {
720 int a = src[i * stride] * 7 + bias;
721 int ind, t;
722
723 // select index. this is a "linear scale" lerp factor between 0 (val=min)
724 // and 7 (val=max).
725 t = (a >= dist4) ? -1 : 0;
726 ind = t & 4;
727 a -= dist4 & t;
728 t = (a >= dist2) ? -1 : 0;
729 ind += t & 2;
730 a -= dist2 & t;
731 ind += (a >= dist);
732
733 // turn linear scale into DXT index (0/1 are extremal pts)
734 ind = -ind & 7;
735 ind ^= (2 > ind);
736
737 // write index
738 mask |= ind << bits;
739 if ((bits += 3) >= 8) {
740 *dest++ = (unsigned char)mask;
741 mask >>= 8;
742 bits -= 8;
743 }
744 }
745}
746
747void stb_compress_bc1_block(unsigned char* dest, const unsigned char* src, int alpha, int mode) {
748 stb__CompressColorBlock(dest, (unsigned char*)src, alpha, mode);
749}
750
751void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode) {
752 unsigned char data[16][4];
753 int i;
754
755 stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4);
756 dest += 8;
757 // make a new copy of the data in which alpha is opaque,
758 // because code uses a fast test for color constancy
759 memcpy(data, src, 4 * 16);
760 for (i = 0; i < 16; ++i)
761 data[i][3] = 255;
762 src = &data[0][0];
763
764 stb__CompressColorBlock(dest, (unsigned char*)src, 0, mode);
765}
diff --git a/externals/stb/stb_dxt.h b/externals/stb/stb_dxt.h
new file mode 100644
index 000000000..07d1d1de4
--- /dev/null
+++ b/externals/stb/stb_dxt.h
@@ -0,0 +1,36 @@
1// SPDX-FileCopyrightText: fabian "ryg" giesen
2// SPDX-License-Identifier: MIT
3
4// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
5
6#ifndef STB_INCLUDE_STB_DXT_H
7#define STB_INCLUDE_STB_DXT_H
8
9#ifdef __cplusplus
10extern "C" {
11#endif
12
13#ifdef STB_DXT_STATIC
14#define STBDDEF static
15#else
16#define STBDDEF extern
17#endif
18
19// compression mode (bitflags)
20#define STB_DXT_NORMAL 0
21#define STB_DXT_DITHER 1 // use dithering. was always dubious, now deprecated. does nothing!
22#define STB_DXT_HIGHQUAL \
23 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
24
25STBDDEF void stb_compress_bc1_block(unsigned char* dest,
26 const unsigned char* src_rgba_four_bytes_per_pixel, int alpha,
27 int mode);
28
29STBDDEF void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode);
30
31#define STB_COMPRESS_DXT_BLOCK
32
33#ifdef __cplusplus
34}
35#endif
36#endif // STB_INCLUDE_STB_DXT_H
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index ba617aea1..ff53e80bb 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -61,6 +61,7 @@ void LogSettings() {
61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); 61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); 62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); 63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
64 log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
64 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue()); 65 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
65 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue()); 66 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue());
66 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); 67 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -224,6 +225,7 @@ void RestoreGlobalState(bool is_powered_on) {
224 values.nvdec_emulation.SetGlobal(true); 225 values.nvdec_emulation.SetGlobal(true);
225 values.accelerate_astc.SetGlobal(true); 226 values.accelerate_astc.SetGlobal(true);
226 values.async_astc.SetGlobal(true); 227 values.async_astc.SetGlobal(true);
228 values.astc_recompression.SetGlobal(true);
227 values.use_reactive_flushing.SetGlobal(true); 229 values.use_reactive_flushing.SetGlobal(true);
228 values.shader_backend.SetGlobal(true); 230 values.shader_backend.SetGlobal(true);
229 values.use_asynchronous_shaders.SetGlobal(true); 231 values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 36ffcd693..7f865b2a7 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -90,6 +90,12 @@ enum class AntiAliasing : u32 {
90 LastAA = Smaa, 90 LastAA = Smaa,
91}; 91};
92 92
93enum class AstcRecompression : u32 {
94 Uncompressed = 0,
95 Bc1 = 1,
96 Bc3 = 2,
97};
98
93struct ResolutionScalingInfo { 99struct ResolutionScalingInfo {
94 u32 up_scale{1}; 100 u32 up_scale{1};
95 u32 down_shift{0}; 101 u32 down_shift{0};
@@ -473,6 +479,9 @@ struct Values {
473 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, 479 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
474 "use_vulkan_driver_pipeline_cache"}; 480 "use_vulkan_driver_pipeline_cache"};
475 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"}; 481 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"};
482 SwitchableSetting<AstcRecompression, true> astc_recompression{
483 AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
484 "astc_recompression"};
476 485
477 SwitchableSetting<u8> bg_red{0, "bg_red"}; 486 SwitchableSetting<u8> bg_red{0, "bg_red"};
478 SwitchableSetting<u8> bg_green{0, "bg_green"}; 487 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a0009a36f..308d013d6 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -246,10 +246,14 @@ add_library(video_core STATIC
246 texture_cache/util.h 246 texture_cache/util.h
247 textures/astc.h 247 textures/astc.h
248 textures/astc.cpp 248 textures/astc.cpp
249 textures/bcn.cpp
250 textures/bcn.h
249 textures/decoders.cpp 251 textures/decoders.cpp
250 textures/decoders.h 252 textures/decoders.h
251 textures/texture.cpp 253 textures/texture.cpp
252 textures/texture.h 254 textures/texture.h
255 textures/workers.cpp
256 textures/workers.h
253 transform_feedback.cpp 257 transform_feedback.cpp
254 transform_feedback.h 258 transform_feedback.h
255 video_core.cpp 259 video_core.cpp
@@ -275,7 +279,7 @@ add_library(video_core STATIC
275create_target_directory_groups(video_core) 279create_target_directory_groups(video_core)
276 280
277target_link_libraries(video_core PUBLIC common core) 281target_link_libraries(video_core PUBLIC common core)
278target_link_libraries(video_core PUBLIC glad shader_recompiler) 282target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
279 283
280if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 284if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
281 add_dependencies(video_core ffmpeg-build) 285 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 98756e4da..f2508fbf0 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1664,7 +1664,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully 1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully
1665 // mapped buffer size for now. 1665 // mapped buffer size for now.
1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); 1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
1667 return memory_layout_size; 1667 return std::min(memory_layout_size, static_cast<u32>(8_MiB));
1668 }(); 1668 }();
1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1670 if (!cpu_addr || size == 0) { 1670 if (!cpu_addr || size == 0) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 31118886f..1e0823836 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -233,6 +233,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
233 const VideoCommon::ImageInfo& info) { 233 const VideoCommon::ImageInfo& info) {
234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) { 234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
235 return Settings::values.accelerate_astc.GetValue() && 235 return Settings::values.accelerate_astc.GetValue() &&
236 Settings::values.astc_recompression.GetValue() ==
237 Settings::AstcRecompression::Uncompressed &&
236 !Settings::values.async_astc.GetValue(); 238 !Settings::values.async_astc.GetValue();
237 } 239 }
238 // Disable other accelerated uploads for now as they don't implement swizzled uploads 240 // Disable other accelerated uploads for now as they don't implement swizzled uploads
@@ -437,6 +439,19 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
437 return GL_R32UI; 439 return GL_R32UI;
438} 440}
439 441
442[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
443 switch (Settings::values.astc_recompression.GetValue()) {
444 case Settings::AstcRecompression::Bc1:
445 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
446 break;
447 case Settings::AstcRecompression::Bc3:
448 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT : GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
449 break;
450 default:
451 return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
452 }
453}
454
440} // Anonymous namespace 455} // Anonymous namespace
441 456
442ImageBufferMap::~ImageBufferMap() { 457ImageBufferMap::~ImageBufferMap() {
@@ -739,9 +754,16 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
739 if (IsConverted(runtime->device, info.format, info.type)) { 754 if (IsConverted(runtime->device, info.format, info.type)) {
740 flags |= ImageFlagBits::Converted; 755 flags |= ImageFlagBits::Converted;
741 flags |= ImageFlagBits::CostlyLoad; 756 flags |= ImageFlagBits::CostlyLoad;
742 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 757
758 const bool is_srgb = IsPixelFormatSRGB(info.format);
759 gl_internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
743 gl_format = GL_RGBA; 760 gl_format = GL_RGBA;
744 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 761 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
762
763 if (IsPixelFormatASTC(info.format)) {
764 gl_internal_format = SelectAstcFormat(info.format, is_srgb);
765 gl_format = GL_NONE;
766 }
745 } else { 767 } else {
746 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); 768 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
747 gl_internal_format = tuple.internal_format; 769 gl_internal_format = tuple.internal_format;
@@ -1130,7 +1152,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1130 views{runtime.null_image_views} { 1152 views{runtime.null_image_views} {
1131 const Device& device = runtime.device; 1153 const Device& device = runtime.device;
1132 if (True(image.flags & ImageFlagBits::Converted)) { 1154 if (True(image.flags & ImageFlagBits::Converted)) {
1133 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 1155 const bool is_srgb = IsPixelFormatSRGB(info.format);
1156 internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
1157
1158 if (IsPixelFormatASTC(info.format)) {
1159 internal_format = SelectAstcFormat(info.format, is_srgb);
1160 }
1134 } else { 1161 } else {
1135 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; 1162 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
1136 } 1163 }
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 8853cf0f7..b75d7220d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -6,6 +6,7 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/settings.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h" 11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/surface.h" 12#include "video_core/surface.h"
@@ -237,14 +238,25 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
237 PixelFormat pixel_format) { 238 PixelFormat pixel_format) {
238 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples)); 239 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
239 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)]; 240 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
240 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 241 // Transcode on hardware that doesn't support ASTC natively
241 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 242 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
242 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); 243 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
243 if (is_srgb) { 244
244 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; 245 switch (Settings::values.astc_recompression.GetValue()) {
245 } else { 246 case Settings::AstcRecompression::Uncompressed:
246 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; 247 if (is_srgb) {
247 tuple.usage |= Storage; 248 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
249 } else {
250 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
251 tuple.usage |= Storage;
252 }
253 break;
254 case Settings::AstcRecompression::Bc1:
255 tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
256 break;
257 case Settings::AstcRecompression::Bc3:
258 tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK;
259 break;
248 } 260 }
249 } 261 }
250 const bool attachable = (tuple.usage & Attachable) != 0; 262 const bool attachable = (tuple.usage & Attachable) != 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 4d0481f2a..77d72697e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1268,7 +1268,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1268 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1268 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
1269 if (Settings::values.async_astc.GetValue()) { 1269 if (Settings::values.async_astc.GetValue()) {
1270 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; 1270 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
1271 } else if (Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) { 1271 } else if (Settings::values.astc_recompression.GetValue() ==
1272 Settings::AstcRecompression::Uncompressed &&
1273 Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
1272 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 1274 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
1273 } 1275 }
1274 flags |= VideoCommon::ImageFlagBits::Converted; 1276 flags |= VideoCommon::ImageFlagBits::Converted;
@@ -1283,7 +1285,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1283 .usage = VK_IMAGE_USAGE_STORAGE_BIT, 1285 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1284 }; 1286 };
1285 current_image = *original_image; 1287 current_image = *original_image;
1286 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1288 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() &&
1289 Settings::values.astc_recompression.GetValue() ==
1290 Settings::AstcRecompression::Uncompressed) {
1287 const auto& device = runtime->device.GetLogical(); 1291 const auto& device = runtime->device.GetLogical();
1288 storage_image_views.reserve(info.resources.levels); 1292 storage_image_views.reserve(info.resources.levels);
1289 for (s32 level = 0; level < info.resources.levels; ++level) { 1293 for (s32 level = 0; level < info.resources.levels; ++level) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index f1071aa23..1463f157b 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -18,6 +18,8 @@
18#include "common/bit_util.h" 18#include "common/bit_util.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h"
22#include "common/settings.h"
21#include "video_core/compatible_formats.h" 23#include "video_core/compatible_formats.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -28,6 +30,7 @@
28#include "video_core/texture_cache/samples_helper.h" 30#include "video_core/texture_cache/samples_helper.h"
29#include "video_core/texture_cache/util.h" 31#include "video_core/texture_cache/util.h"
30#include "video_core/textures/astc.h" 32#include "video_core/textures/astc.h"
33#include "video_core/textures/bcn.h"
31#include "video_core/textures/decoders.h" 34#include "video_core/textures/decoders.h"
32 35
33namespace VideoCommon { 36namespace VideoCommon {
@@ -585,6 +588,21 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
585 return info.size.width * BytesPerBlock(info.format); 588 return info.size.width * BytesPerBlock(info.format);
586 } 589 }
587 static constexpr Extent2D TILE_SIZE{1, 1}; 590 static constexpr Extent2D TILE_SIZE{1, 1};
591 if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() !=
592 Settings::AstcRecompression::Uncompressed) {
593 const u32 bpp_div =
594 Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2
595 : 1;
596 // NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually.
597 u32 output_size = 0;
598 for (s32 i = 0; i < info.resources.levels; i++) {
599 const auto mip_size = AdjustMipSize(info.size, i);
600 const u32 plane_dim =
601 Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U);
602 output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div;
603 }
604 return output_size;
605 }
588 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; 606 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
589} 607}
590 608
@@ -885,6 +903,7 @@ BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
885void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
886 std::span<BufferImageCopy> copies) { 904 std::span<BufferImageCopy> copies) {
887 u32 output_offset = 0; 905 u32 output_offset = 0;
906 Common::ScratchBuffer<u8> decode_scratch;
888 907
889 const Extent2D tile_size = DefaultBlockSize(info.format); 908 const Extent2D tile_size = DefaultBlockSize(info.format);
890 for (BufferImageCopy& copy : copies) { 909 for (BufferImageCopy& copy : copies) {
@@ -895,22 +914,58 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
895 ASSERT(copy.image_extent == mip_size); 914 ASSERT(copy.image_extent == mip_size);
896 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); 915 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
897 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); 916 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
898 if (IsPixelFormatASTC(info.format)) { 917
918 const auto input_offset = input.subspan(copy.buffer_offset);
919 copy.buffer_offset = output_offset;
920 copy.buffer_row_length = mip_size.width;
921 copy.buffer_image_height = mip_size.height;
922
923 const auto recompression_setting = Settings::values.astc_recompression.GetValue();
924 const bool astc = IsPixelFormatASTC(info.format);
925
926 if (astc && recompression_setting == Settings::AstcRecompression::Uncompressed) {
899 Tegra::Texture::ASTC::Decompress( 927 Tegra::Texture::ASTC::Decompress(
900 input.subspan(copy.buffer_offset), copy.image_extent.width, 928 input_offset, copy.image_extent.width, copy.image_extent.height,
901 copy.image_extent.height,
902 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width, 929 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
903 tile_size.height, output.subspan(output_offset)); 930 tile_size.height, output.subspan(output_offset));
931
932 output_offset += copy.image_extent.width * copy.image_extent.height *
933 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
934 } else if (astc) {
935 // BC1 uses 0.5 bytes per texel
936 // BC3 uses 1 byte per texel
937 const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
938 ? Tegra::Texture::BCN::CompressBC1
939 : Tegra::Texture::BCN::CompressBC3;
940 const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
941
942 const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
943 const u32 level_size = plane_dim * copy.image_extent.depth *
944 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
945 decode_scratch.resize_destructive(level_size);
946
947 Tegra::Texture::ASTC::Decompress(
948 input_offset, copy.image_extent.width, copy.image_extent.height,
949 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
950 tile_size.height, decode_scratch);
951
952 compress(decode_scratch, copy.image_extent.width, copy.image_extent.height,
953 copy.image_subresource.num_layers * copy.image_extent.depth,
954 output.subspan(output_offset));
955
956 const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
957 Common::AlignUp(copy.image_extent.height, 4);
958
959 copy.buffer_size =
960 (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
961 bpp_div;
962 output_offset += static_cast<u32>(copy.buffer_size);
904 } else { 963 } else {
905 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, 964 DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
906 output.subspan(output_offset));
907 }
908 copy.buffer_offset = output_offset;
909 copy.buffer_row_length = mip_size.width;
910 copy.buffer_image_height = mip_size.height;
911 965
912 output_offset += copy.image_extent.width * copy.image_extent.height * 966 output_offset += copy.image_extent.width * copy.image_extent.height *
913 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 967 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
968 }
914 } 969 }
915} 970}
916 971
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a68bc0d77..fef0be31d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -16,8 +16,8 @@
16#include "common/alignment.h" 16#include "common/alignment.h"
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/polyfill_ranges.h" 18#include "common/polyfill_ranges.h"
19#include "common/thread_worker.h"
20#include "video_core/textures/astc.h" 19#include "video_core/textures/astc.h"
20#include "video_core/textures/workers.h"
21 21
22class InputBitStream { 22class InputBitStream {
23public: 23public:
@@ -1656,8 +1656,7 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
1656 const u32 rows = Common::DivideUp(height, block_height); 1656 const u32 rows = Common::DivideUp(height, block_height);
1657 const u32 cols = Common::DivideUp(width, block_width); 1657 const u32 cols = Common::DivideUp(width, block_width);
1658 1658
1659 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, 1659 Common::ThreadWorker& workers{GetThreadWorkers()};
1660 "ASTCDecompress"};
1661 1660
1662 for (u32 z = 0; z < depth; ++z) { 1661 for (u32 z = 0; z < depth; ++z) {
1663 const u32 depth_offset = z * height * width * 4; 1662 const u32 depth_offset = z * height * width * 4;
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp
new file mode 100644
index 000000000..671212a49
--- /dev/null
+++ b/src/video_core/textures/bcn.cpp
@@ -0,0 +1,87 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stb_dxt.h>
5#include <string.h>
6
7#include "common/alignment.h"
8#include "video_core/textures/bcn.h"
9#include "video_core/textures/workers.h"
10
11namespace Tegra::Texture::BCN {
12
13using BCNCompressor = void(u8* block_output, const u8* block_input, bool any_alpha);
14
15template <u32 BytesPerBlock, bool ThresholdAlpha = false>
16void CompressBCN(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
17 std::span<uint8_t> output, BCNCompressor f) {
18 constexpr u8 alpha_threshold = 128;
19 constexpr u32 bytes_per_px = 4;
20 const u32 plane_dim = width * height;
21
22 Common::ThreadWorker& workers{GetThreadWorkers()};
23
24 for (u32 z = 0; z < depth; z++) {
25 for (u32 y = 0; y < height; y += 4) {
26 auto compress_row = [z, y, width, height, plane_dim, f, data, output]() {
27 for (u32 x = 0; x < width; x += 4) {
28 // Gather 4x4 block of RGBA texels
29 u8 input_colors[4][4][4];
30 bool any_alpha = false;
31
32 for (u32 j = 0; j < 4; j++) {
33 for (u32 i = 0; i < 4; i++) {
34 const size_t coord =
35 (z * plane_dim + (y + j) * width + (x + i)) * bytes_per_px;
36
37 if ((x + i < width) && (y + j < height)) {
38 if constexpr (ThresholdAlpha) {
39 if (data[coord + 3] >= alpha_threshold) {
40 input_colors[j][i][0] = data[coord + 0];
41 input_colors[j][i][1] = data[coord + 1];
42 input_colors[j][i][2] = data[coord + 2];
43 input_colors[j][i][3] = 255;
44 } else {
45 any_alpha = true;
46 memset(input_colors[j][i], 0, bytes_per_px);
47 }
48 } else {
49 memcpy(input_colors[j][i], &data[coord], bytes_per_px);
50 }
51 } else {
52 memset(input_colors[j][i], 0, bytes_per_px);
53 }
54 }
55 }
56
57 const u32 bytes_per_row = BytesPerBlock * Common::DivideUp(width, 4U);
58 const u32 bytes_per_plane = bytes_per_row * Common::DivideUp(height, 4U);
59 f(output.data() + z * bytes_per_plane + (y / 4) * bytes_per_row +
60 (x / 4) * BytesPerBlock,
61 reinterpret_cast<u8*>(input_colors), any_alpha);
62 }
63 };
64 workers.QueueWork(std::move(compress_row));
65 }
66 workers.WaitForRequests();
67 }
68}
69
70void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
71 std::span<uint8_t> output) {
72 CompressBCN<8, true>(data, width, height, depth, output,
73 [](u8* block_output, const u8* block_input, bool any_alpha) {
74 stb_compress_bc1_block(block_output, block_input, any_alpha,
75 STB_DXT_NORMAL);
76 });
77}
78
79void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
80 std::span<uint8_t> output) {
81 CompressBCN<16, false>(data, width, height, depth, output,
82 [](u8* block_output, const u8* block_input, bool any_alpha) {
83 stb_compress_bc3_block(block_output, block_input, STB_DXT_NORMAL);
84 });
85}
86
87} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h
new file mode 100644
index 000000000..6464af885
--- /dev/null
+++ b/src/video_core/textures/bcn.h
@@ -0,0 +1,17 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <stdint.h>
8
9namespace Tegra::Texture::BCN {
10
11void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
12 std::span<uint8_t> output);
13
14void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
15 std::span<uint8_t> output);
16
17} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/workers.cpp b/src/video_core/textures/workers.cpp
new file mode 100644
index 000000000..a71c305f4
--- /dev/null
+++ b/src/video_core/textures/workers.cpp
@@ -0,0 +1,15 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/textures/workers.h"
5
6namespace Tegra::Texture {
7
8Common::ThreadWorker& GetThreadWorkers() {
9 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
10 "ImageTranscode"};
11
12 return workers;
13}
14
15} // namespace Tegra::Texture
diff --git a/src/video_core/textures/workers.h b/src/video_core/textures/workers.h
new file mode 100644
index 000000000..008dd05b3
--- /dev/null
+++ b/src/video_core/textures/workers.h
@@ -0,0 +1,12 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/thread_worker.h"
7
8namespace Tegra::Texture {
9
10Common::ThreadWorker& GetThreadWorkers();
11
12}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f6e6f2736..b49f78bc9 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1001,6 +1001,11 @@ u64 Device::GetDeviceMemoryUsage() const {
1001} 1001}
1002 1002
1003void Device::CollectPhysicalMemoryInfo() { 1003void Device::CollectPhysicalMemoryInfo() {
1004 // Account for resolution scaling in memory limits
1005 const size_t normal_memory = 6_GiB;
1006 const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
1007
1008 // Calculate limits using memory budget
1004 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; 1009 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
1005 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 1010 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
1006 const auto mem_info = 1011 const auto mem_info =
@@ -1030,6 +1035,7 @@ void Device::CollectPhysicalMemoryInfo() {
1030 if (!is_integrated) { 1035 if (!is_integrated) {
1031 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB); 1036 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
1032 device_access_memory -= reserve_memory; 1037 device_access_memory -= reserve_memory;
1038 device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
1033 return; 1039 return;
1034 } 1040 }
1035 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); 1041 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 70737c54e..662651196 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -711,6 +711,7 @@ void Config::ReadRendererValues() {
711 ReadGlobalSetting(Settings::values.nvdec_emulation); 711 ReadGlobalSetting(Settings::values.nvdec_emulation);
712 ReadGlobalSetting(Settings::values.accelerate_astc); 712 ReadGlobalSetting(Settings::values.accelerate_astc);
713 ReadGlobalSetting(Settings::values.async_astc); 713 ReadGlobalSetting(Settings::values.async_astc);
714 ReadGlobalSetting(Settings::values.astc_recompression);
714 ReadGlobalSetting(Settings::values.use_reactive_flushing); 715 ReadGlobalSetting(Settings::values.use_reactive_flushing);
715 ReadGlobalSetting(Settings::values.shader_backend); 716 ReadGlobalSetting(Settings::values.shader_backend);
716 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 717 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1359,6 +1360,10 @@ void Config::SaveRendererValues() {
1359 Settings::values.nvdec_emulation.UsingGlobal()); 1360 Settings::values.nvdec_emulation.UsingGlobal());
1360 WriteGlobalSetting(Settings::values.accelerate_astc); 1361 WriteGlobalSetting(Settings::values.accelerate_astc);
1361 WriteGlobalSetting(Settings::values.async_astc); 1362 WriteGlobalSetting(Settings::values.async_astc);
1363 WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
1364 static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
1365 static_cast<u32>(Settings::values.astc_recompression.GetDefault()),
1366 Settings::values.astc_recompression.UsingGlobal());
1362 WriteGlobalSetting(Settings::values.use_reactive_flushing); 1367 WriteGlobalSetting(Settings::values.use_reactive_flushing);
1363 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), 1368 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
1364 static_cast<u32>(Settings::values.shader_backend.GetValue(global)), 1369 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 7d26e9ab6..9cb9db6cf 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -208,3 +208,4 @@ Q_DECLARE_METATYPE(Settings::ScalingFilter);
208Q_DECLARE_METATYPE(Settings::AntiAliasing); 208Q_DECLARE_METATYPE(Settings::AntiAliasing);
209Q_DECLARE_METATYPE(Settings::RendererBackend); 209Q_DECLARE_METATYPE(Settings::RendererBackend);
210Q_DECLARE_METATYPE(Settings::ShaderBackend); 210Q_DECLARE_METATYPE(Settings::ShaderBackend);
211Q_DECLARE_METATYPE(Settings::AstcRecompression);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 1f3e489d0..896863f87 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -27,6 +27,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
27 ui->async_present->setEnabled(runtime_lock); 27 ui->async_present->setEnabled(runtime_lock);
28 ui->renderer_force_max_clock->setEnabled(runtime_lock); 28 ui->renderer_force_max_clock->setEnabled(runtime_lock);
29 ui->async_astc->setEnabled(runtime_lock); 29 ui->async_astc->setEnabled(runtime_lock);
30 ui->astc_recompression_combobox->setEnabled(runtime_lock);
30 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 31 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
31 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 32 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
32 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock); 33 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock);
@@ -47,14 +48,20 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
47 static_cast<int>(Settings::values.gpu_accuracy.GetValue())); 48 static_cast<int>(Settings::values.gpu_accuracy.GetValue()));
48 ui->anisotropic_filtering_combobox->setCurrentIndex( 49 ui->anisotropic_filtering_combobox->setCurrentIndex(
49 Settings::values.max_anisotropy.GetValue()); 50 Settings::values.max_anisotropy.GetValue());
51 ui->astc_recompression_combobox->setCurrentIndex(
52 static_cast<int>(Settings::values.astc_recompression.GetValue()));
50 } else { 53 } else {
51 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); 54 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
52 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, 55 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
53 &Settings::values.max_anisotropy); 56 &Settings::values.max_anisotropy);
57 ConfigurationShared::SetPerGameSetting(ui->astc_recompression_combobox,
58 &Settings::values.astc_recompression);
54 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 59 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
55 !Settings::values.gpu_accuracy.UsingGlobal()); 60 !Settings::values.gpu_accuracy.UsingGlobal());
56 ConfigurationShared::SetHighlight(ui->af_label, 61 ConfigurationShared::SetHighlight(ui->af_label,
57 !Settings::values.max_anisotropy.UsingGlobal()); 62 !Settings::values.max_anisotropy.UsingGlobal());
63 ConfigurationShared::SetHighlight(ui->label_astc_recompression,
64 !Settings::values.astc_recompression.UsingGlobal());
58 } 65 }
59} 66}
60 67
@@ -71,6 +78,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
71 ui->use_reactive_flushing, use_reactive_flushing); 78 ui->use_reactive_flushing, use_reactive_flushing);
72 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, 79 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
73 async_astc); 80 async_astc);
81 ConfigurationShared::ApplyPerGameSetting(&Settings::values.astc_recompression,
82 ui->astc_recompression_combobox);
74 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 83 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
75 ui->use_asynchronous_shaders, 84 ui->use_asynchronous_shaders,
76 use_asynchronous_shaders); 85 use_asynchronous_shaders);
@@ -105,6 +114,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
105 Settings::values.renderer_force_max_clock.UsingGlobal()); 114 Settings::values.renderer_force_max_clock.UsingGlobal());
106 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal()); 115 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal());
107 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); 116 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
117 ui->astc_recompression_combobox->setEnabled(
118 Settings::values.astc_recompression.UsingGlobal());
108 ui->use_asynchronous_shaders->setEnabled( 119 ui->use_asynchronous_shaders->setEnabled(
109 Settings::values.use_asynchronous_shaders.UsingGlobal()); 120 Settings::values.use_asynchronous_shaders.UsingGlobal());
110 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 121 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -144,6 +155,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
144 ConfigurationShared::SetColoredComboBox( 155 ConfigurationShared::SetColoredComboBox(
145 ui->anisotropic_filtering_combobox, ui->af_label, 156 ui->anisotropic_filtering_combobox, ui->af_label,
146 static_cast<int>(Settings::values.max_anisotropy.GetValue(true))); 157 static_cast<int>(Settings::values.max_anisotropy.GetValue(true)));
158 ConfigurationShared::SetColoredComboBox(
159 ui->astc_recompression_combobox, ui->label_astc_recompression,
160 static_cast<int>(Settings::values.astc_recompression.GetValue(true)));
147} 161}
148 162
149void ConfigureGraphicsAdvanced::ExposeComputeOption() { 163void ConfigureGraphicsAdvanced::ExposeComputeOption() {
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 9ef7c8e8f..37757a918 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,50 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QWidget" name="astc_recompression_layout" native="true">
74 <layout class="QHBoxLayout" name="horizontalLayout_3">
75 <property name="leftMargin">
76 <number>0</number>
77 </property>
78 <property name="topMargin">
79 <number>0</number>
80 </property>
81 <property name="rightMargin">
82 <number>0</number>
83 </property>
84 <property name="bottomMargin">
85 <number>0</number>
86 </property>
87 <item>
88 <widget class="QLabel" name="label_astc_recompression">
89 <property name="text">
90 <string>ASTC recompression:</string>
91 </property>
92 </widget>
93 </item>
94 <item>
95 <widget class="QComboBox" name="astc_recompression_combobox">
96 <item>
97 <property name="text">
98 <string>Uncompressed (Best quality)</string>
99 </property>
100 </item>
101 <item>
102 <property name="text">
103 <string>BC1 (Low quality)</string>
104 </property>
105 </item>
106 <item>
107 <property name="text">
108 <string>BC3 (Medium quality)</string>
109 </property>
110 </item>
111 </widget>
112 </item>
113 </layout>
114 </widget>
115 </item>
116 <item>
73 <widget class="QCheckBox" name="async_present"> 117 <widget class="QCheckBox" name="async_present">
74 <property name="text"> 118 <property name="text">
75 <string>Enable asynchronous presentation (Vulkan only)</string> 119 <string>Enable asynchronous presentation (Vulkan only)</string>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index dc9a3d68f..c5bc472ca 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -318,6 +318,7 @@ void Config::ReadValues() {
318 ReadSetting("Renderer", Settings::values.nvdec_emulation); 318 ReadSetting("Renderer", Settings::values.nvdec_emulation);
319 ReadSetting("Renderer", Settings::values.accelerate_astc); 319 ReadSetting("Renderer", Settings::values.accelerate_astc);
320 ReadSetting("Renderer", Settings::values.async_astc); 320 ReadSetting("Renderer", Settings::values.async_astc);
321 ReadSetting("Renderer", Settings::values.astc_recompression);
321 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 322 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
322 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); 323 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
323 324
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 5e7c3ac04..644a30e59 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -360,6 +360,10 @@ accelerate_astc =
360# 0 (default): Off, 1: On 360# 0 (default): Off, 1: On
361async_astc = 361async_astc =
362 362
363# Recompress ASTC textures to a different format.
364# 0 (default): Uncompressed, 1: BC1 (Low quality), 2: BC3: (Medium quality)
365async_astc =
366
363# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value 367# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
364# 0: Off, 1: On (default) 368# 0: Off, 1: On (default)
365use_speed_limit = 369use_speed_limit =