summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--externals/CMakeLists.txt4
-rw-r--r--externals/stb/stb_dxt.cpp765
-rw-r--r--externals/stb/stb_dxt.h36
-rw-r--r--src/audio_core/renderer/adsp/audio_renderer.cpp5
-rw-r--r--src/audio_core/renderer/system_manager.cpp9
-rw-r--r--src/audio_core/renderer/system_manager.h10
-rw-r--r--src/audio_core/sink/sink_stream.cpp4
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h9
-rw-r--r--src/core/hid/emulated_controller.cpp9
-rw-r--r--src/core/hle/kernel/k_memory_block_manager.h8
-rw-r--r--src/core/hle/service/nfc/common/amiibo_crypto.cpp3
-rw-r--r--src/core/hle/service/nfc/common/device.cpp75
-rw-r--r--src/core/hle/service/nfc/common/device.h3
-rw-r--r--src/input_common/drivers/joycon.cpp8
-rw-r--r--src/input_common/helpers/joycon_driver.cpp20
-rw-r--r--src/input_common/helpers/joycon_driver.h1
-rw-r--r--src/input_common/helpers/joycon_protocol/joycon_types.h50
-rw-r--r--src/input_common/helpers/joycon_protocol/nfc.cpp332
-rw-r--r--src/input_common/helpers/joycon_protocol/nfc.h27
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp2
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/image_base.cpp7
-rw-r--r--src/video_core/texture_cache/image_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h194
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h20
-rw-r--r--src/video_core/texture_cache/util.cpp92
-rw-r--r--src/video_core/textures/astc.cpp5
-rw-r--r--src/video_core/textures/bcn.cpp87
-rw-r--r--src/video_core/textures/bcn.h17
-rw-r--r--src/video_core/textures/workers.cpp15
-rw-r--r--src/video_core/textures/workers.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp8
-rw-r--r--src/yuzu/configuration/config.cpp5
-rw-r--r--src/yuzu/configuration/config.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp14
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui44
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/yuzu.cpp4
49 files changed, 1842 insertions, 221 deletions
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index f2a560f04..e59eeb489 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -139,3 +139,7 @@ if (NOT TARGET LLVM::Demangle)
139 target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp) 139 target_sources(demangle PRIVATE demangle/ItaniumDemangle.cpp)
140 add_library(LLVM::Demangle ALIAS demangle) 140 add_library(LLVM::Demangle ALIAS demangle)
141endif() 141endif()
142
143add_library(stb STATIC)
144target_include_directories(stb PUBLIC ./stb)
145target_sources(stb PRIVATE stb/stb_dxt.cpp)
diff --git a/externals/stb/stb_dxt.cpp b/externals/stb/stb_dxt.cpp
new file mode 100644
index 000000000..64f1f3d03
--- /dev/null
+++ b/externals/stb/stb_dxt.cpp
@@ -0,0 +1,765 @@
1// SPDX-FileCopyrightText: fabian "ryg" giesen
2// SPDX-License-Identifier: MIT
3
4// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
5
6#include <stb_dxt.h>
7
8#include <stdlib.h>
9#include <string.h>
10
11#if !defined(STBD_FABS)
12#include <math.h>
13#endif
14
15#ifndef STBD_FABS
16#define STBD_FABS(x) fabs(x)
17#endif
18
19static const unsigned char stb__OMatch5[256][2] = {
20 {0, 0}, {0, 0}, {0, 1}, {0, 1}, {1, 0}, {1, 0}, {1, 0}, {1, 1}, {1, 1},
21 {1, 1}, {1, 2}, {0, 4}, {2, 1}, {2, 1}, {2, 1}, {2, 2}, {2, 2}, {2, 2},
22 {2, 3}, {1, 5}, {3, 2}, {3, 2}, {4, 0}, {3, 3}, {3, 3}, {3, 3}, {3, 4},
23 {3, 4}, {3, 4}, {3, 5}, {4, 3}, {4, 3}, {5, 2}, {4, 4}, {4, 4}, {4, 5},
24 {4, 5}, {5, 4}, {5, 4}, {5, 4}, {6, 3}, {5, 5}, {5, 5}, {5, 6}, {4, 8},
25 {6, 5}, {6, 5}, {6, 5}, {6, 6}, {6, 6}, {6, 6}, {6, 7}, {5, 9}, {7, 6},
26 {7, 6}, {8, 4}, {7, 7}, {7, 7}, {7, 7}, {7, 8}, {7, 8}, {7, 8}, {7, 9},
27 {8, 7}, {8, 7}, {9, 6}, {8, 8}, {8, 8}, {8, 9}, {8, 9}, {9, 8}, {9, 8},
28 {9, 8}, {10, 7}, {9, 9}, {9, 9}, {9, 10}, {8, 12}, {10, 9}, {10, 9}, {10, 9},
29 {10, 10}, {10, 10}, {10, 10}, {10, 11}, {9, 13}, {11, 10}, {11, 10}, {12, 8}, {11, 11},
30 {11, 11}, {11, 11}, {11, 12}, {11, 12}, {11, 12}, {11, 13}, {12, 11}, {12, 11}, {13, 10},
31 {12, 12}, {12, 12}, {12, 13}, {12, 13}, {13, 12}, {13, 12}, {13, 12}, {14, 11}, {13, 13},
32 {13, 13}, {13, 14}, {12, 16}, {14, 13}, {14, 13}, {14, 13}, {14, 14}, {14, 14}, {14, 14},
33 {14, 15}, {13, 17}, {15, 14}, {15, 14}, {16, 12}, {15, 15}, {15, 15}, {15, 15}, {15, 16},
34 {15, 16}, {15, 16}, {15, 17}, {16, 15}, {16, 15}, {17, 14}, {16, 16}, {16, 16}, {16, 17},
35 {16, 17}, {17, 16}, {17, 16}, {17, 16}, {18, 15}, {17, 17}, {17, 17}, {17, 18}, {16, 20},
36 {18, 17}, {18, 17}, {18, 17}, {18, 18}, {18, 18}, {18, 18}, {18, 19}, {17, 21}, {19, 18},
37 {19, 18}, {20, 16}, {19, 19}, {19, 19}, {19, 19}, {19, 20}, {19, 20}, {19, 20}, {19, 21},
38 {20, 19}, {20, 19}, {21, 18}, {20, 20}, {20, 20}, {20, 21}, {20, 21}, {21, 20}, {21, 20},
39 {21, 20}, {22, 19}, {21, 21}, {21, 21}, {21, 22}, {20, 24}, {22, 21}, {22, 21}, {22, 21},
40 {22, 22}, {22, 22}, {22, 22}, {22, 23}, {21, 25}, {23, 22}, {23, 22}, {24, 20}, {23, 23},
41 {23, 23}, {23, 23}, {23, 24}, {23, 24}, {23, 24}, {23, 25}, {24, 23}, {24, 23}, {25, 22},
42 {24, 24}, {24, 24}, {24, 25}, {24, 25}, {25, 24}, {25, 24}, {25, 24}, {26, 23}, {25, 25},
43 {25, 25}, {25, 26}, {24, 28}, {26, 25}, {26, 25}, {26, 25}, {26, 26}, {26, 26}, {26, 26},
44 {26, 27}, {25, 29}, {27, 26}, {27, 26}, {28, 24}, {27, 27}, {27, 27}, {27, 27}, {27, 28},
45 {27, 28}, {27, 28}, {27, 29}, {28, 27}, {28, 27}, {29, 26}, {28, 28}, {28, 28}, {28, 29},
46 {28, 29}, {29, 28}, {29, 28}, {29, 28}, {30, 27}, {29, 29}, {29, 29}, {29, 30}, {29, 30},
47 {30, 29}, {30, 29}, {30, 29}, {30, 30}, {30, 30}, {30, 30}, {30, 31}, {30, 31}, {31, 30},
48 {31, 30}, {31, 30}, {31, 31}, {31, 31},
49};
50static const unsigned char stb__OMatch6[256][2] = {
51 {0, 0}, {0, 1}, {1, 0}, {1, 1}, {1, 1}, {1, 2}, {2, 1}, {2, 2}, {2, 2},
52 {2, 3}, {3, 2}, {3, 3}, {3, 3}, {3, 4}, {4, 3}, {4, 4}, {4, 4}, {4, 5},
53 {5, 4}, {5, 5}, {5, 5}, {5, 6}, {6, 5}, {6, 6}, {6, 6}, {6, 7}, {7, 6},
54 {7, 7}, {7, 7}, {7, 8}, {8, 7}, {8, 8}, {8, 8}, {8, 9}, {9, 8}, {9, 9},
55 {9, 9}, {9, 10}, {10, 9}, {10, 10}, {10, 10}, {10, 11}, {11, 10}, {8, 16}, {11, 11},
56 {11, 12}, {12, 11}, {9, 17}, {12, 12}, {12, 13}, {13, 12}, {11, 16}, {13, 13}, {13, 14},
57 {14, 13}, {12, 17}, {14, 14}, {14, 15}, {15, 14}, {14, 16}, {15, 15}, {15, 16}, {16, 14},
58 {16, 15}, {17, 14}, {16, 16}, {16, 17}, {17, 16}, {18, 15}, {17, 17}, {17, 18}, {18, 17},
59 {20, 14}, {18, 18}, {18, 19}, {19, 18}, {21, 15}, {19, 19}, {19, 20}, {20, 19}, {20, 20},
60 {20, 20}, {20, 21}, {21, 20}, {21, 21}, {21, 21}, {21, 22}, {22, 21}, {22, 22}, {22, 22},
61 {22, 23}, {23, 22}, {23, 23}, {23, 23}, {23, 24}, {24, 23}, {24, 24}, {24, 24}, {24, 25},
62 {25, 24}, {25, 25}, {25, 25}, {25, 26}, {26, 25}, {26, 26}, {26, 26}, {26, 27}, {27, 26},
63 {24, 32}, {27, 27}, {27, 28}, {28, 27}, {25, 33}, {28, 28}, {28, 29}, {29, 28}, {27, 32},
64 {29, 29}, {29, 30}, {30, 29}, {28, 33}, {30, 30}, {30, 31}, {31, 30}, {30, 32}, {31, 31},
65 {31, 32}, {32, 30}, {32, 31}, {33, 30}, {32, 32}, {32, 33}, {33, 32}, {34, 31}, {33, 33},
66 {33, 34}, {34, 33}, {36, 30}, {34, 34}, {34, 35}, {35, 34}, {37, 31}, {35, 35}, {35, 36},
67 {36, 35}, {36, 36}, {36, 36}, {36, 37}, {37, 36}, {37, 37}, {37, 37}, {37, 38}, {38, 37},
68 {38, 38}, {38, 38}, {38, 39}, {39, 38}, {39, 39}, {39, 39}, {39, 40}, {40, 39}, {40, 40},
69 {40, 40}, {40, 41}, {41, 40}, {41, 41}, {41, 41}, {41, 42}, {42, 41}, {42, 42}, {42, 42},
70 {42, 43}, {43, 42}, {40, 48}, {43, 43}, {43, 44}, {44, 43}, {41, 49}, {44, 44}, {44, 45},
71 {45, 44}, {43, 48}, {45, 45}, {45, 46}, {46, 45}, {44, 49}, {46, 46}, {46, 47}, {47, 46},
72 {46, 48}, {47, 47}, {47, 48}, {48, 46}, {48, 47}, {49, 46}, {48, 48}, {48, 49}, {49, 48},
73 {50, 47}, {49, 49}, {49, 50}, {50, 49}, {52, 46}, {50, 50}, {50, 51}, {51, 50}, {53, 47},
74 {51, 51}, {51, 52}, {52, 51}, {52, 52}, {52, 52}, {52, 53}, {53, 52}, {53, 53}, {53, 53},
75 {53, 54}, {54, 53}, {54, 54}, {54, 54}, {54, 55}, {55, 54}, {55, 55}, {55, 55}, {55, 56},
76 {56, 55}, {56, 56}, {56, 56}, {56, 57}, {57, 56}, {57, 57}, {57, 57}, {57, 58}, {58, 57},
77 {58, 58}, {58, 58}, {58, 59}, {59, 58}, {59, 59}, {59, 59}, {59, 60}, {60, 59}, {60, 60},
78 {60, 60}, {60, 61}, {61, 60}, {61, 61}, {61, 61}, {61, 62}, {62, 61}, {62, 62}, {62, 62},
79 {62, 63}, {63, 62}, {63, 63}, {63, 63},
80};
81
82static int stb__Mul8Bit(int a, int b) {
83 int t = a * b + 128;
84 return (t + (t >> 8)) >> 8;
85}
86
87static void stb__From16Bit(unsigned char* out, unsigned short v) {
88 int rv = (v & 0xf800) >> 11;
89 int gv = (v & 0x07e0) >> 5;
90 int bv = (v & 0x001f) >> 0;
91
92 // expand to 8 bits via bit replication
93 out[0] = static_cast<unsigned char>((rv * 33) >> 2);
94 out[1] = static_cast<unsigned char>((gv * 65) >> 4);
95 out[2] = static_cast<unsigned char>((bv * 33) >> 2);
96 out[3] = 0;
97}
98
99static unsigned short stb__As16Bit(int r, int g, int b) {
100 return (unsigned short)((stb__Mul8Bit(r, 31) << 11) + (stb__Mul8Bit(g, 63) << 5) +
101 stb__Mul8Bit(b, 31));
102}
103
104// linear interpolation at 1/3 point between a and b, using desired rounding
105// type
106static int stb__Lerp13(int a, int b) {
107#ifdef STB_DXT_USE_ROUNDING_BIAS
108 // with rounding bias
109 return a + stb__Mul8Bit(b - a, 0x55);
110#else
111 // without rounding bias
112 // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really
113 // need every ounce of speed.
114 return (2 * a + b) / 3;
115#endif
116}
117
118// linear interpolation at 1/2 point between a and b
119static int stb__Lerp12(int a, int b) {
120 return (a + b) / 2;
121}
122
123// lerp RGB color
124static void stb__Lerp13RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
125 out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]);
126 out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]);
127 out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]);
128}
129
130static void stb__Lerp12RGB(unsigned char* out, unsigned char* p1, unsigned char* p2) {
131 out[0] = (unsigned char)stb__Lerp12(p1[0], p2[0]);
132 out[1] = (unsigned char)stb__Lerp12(p1[1], p2[1]);
133 out[2] = (unsigned char)stb__Lerp12(p1[2], p2[2]);
134}
135
136/****************************************************************************/
137
138static void stb__Eval4Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
139 stb__From16Bit(color + 0, c0);
140 stb__From16Bit(color + 4, c1);
141 stb__Lerp13RGB(color + 8, color + 0, color + 4);
142 stb__Lerp13RGB(color + 12, color + 4, color + 0);
143}
144
145static void stb__Eval3Colors(unsigned char* color, unsigned short c0, unsigned short c1) {
146 stb__From16Bit(color + 0, c0);
147 stb__From16Bit(color + 4, c1);
148 stb__Lerp12RGB(color + 8, color + 0, color + 4);
149}
150
151// The color matching function
152static unsigned int stb__MatchColorsBlock(unsigned char* block, unsigned char* color) {
153 unsigned int mask = 0;
154 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
155 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
156 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
157 int dots[16];
158 int stops[4];
159 int i;
160 int c0Point, halfPoint, c3Point;
161
162 for (i = 0; i < 16; i++)
163 dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
164
165 for (i = 0; i < 4; i++)
166 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
167
168 // think of the colors as arranged on a line; project point onto that line,
169 // then choose next color out of available ones. we compute the crossover
170 // points for "best color in top half"/"best in bottom half" and then the same
171 // inside that subinterval.
172 //
173 // relying on this 1d approximation isn't always optimal in terms of euclidean
174 // distance, but it's very close and a lot faster.
175 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html
176
177 c0Point = (stops[1] + stops[3]);
178 halfPoint = (stops[3] + stops[2]);
179 c3Point = (stops[2] + stops[0]);
180
181 for (i = 15; i >= 0; i--) {
182 int dot = dots[i] * 2;
183 mask <<= 2;
184
185 if (dot < halfPoint)
186 mask |= (dot < c0Point) ? 1 : 3;
187 else
188 mask |= (dot < c3Point) ? 2 : 0;
189 }
190
191 return mask;
192}
193
194static unsigned int stb__MatchColorsAlphaBlock(unsigned char* block, unsigned char* color) {
195 unsigned int mask = 0;
196 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
197 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
198 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
199 int dots[16];
200 int stops[3];
201 int i;
202 int c0Point, c2Point;
203
204 for (i = 0; i < 16; i++)
205 dots[i] = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + block[i * 4 + 2] * dirb;
206
207 for (i = 0; i < 3; i++)
208 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + color[i * 4 + 2] * dirb;
209
210 c0Point = (stops[1] + stops[2]);
211 c2Point = (stops[2] + stops[0]);
212
213 for (i = 15; i >= 0; i--) {
214 int dot = dots[i] * 2;
215 mask <<= 2;
216
217 if (block[i * 4 + 3] == 0)
218 mask |= 3;
219 else if (dot < c2Point)
220 mask |= (dot < c0Point) ? 0 : 2;
221 else
222 mask |= (dot < c0Point) ? 1 : 0;
223 }
224
225 return mask;
226}
227
228static void stb__ReorderColors(unsigned short* pmax16, unsigned short* pmin16) {
229 if (*pmin16 < *pmax16) {
230 unsigned short t = *pmin16;
231 *pmin16 = *pmax16;
232 *pmax16 = t;
233 }
234}
235
236static void stb__FinalizeColors(unsigned short* pmax16, unsigned short* pmin16,
237 unsigned int* pmask) {
238 if (*pmax16 < *pmin16) {
239 unsigned short t = *pmin16;
240 *pmin16 = *pmax16;
241 *pmax16 = t;
242 *pmask ^= 0x55555555;
243 }
244}
245
246// The color optimization function. (Clever code, part 1)
247static void stb__OptimizeColorsBlock(unsigned char* block, unsigned short* pmax16,
248 unsigned short* pmin16) {
249 int mind, maxd;
250 unsigned char *minp, *maxp;
251 double magn;
252 int v_r, v_g, v_b;
253 static const int nIterPower = 4;
254 float covf[6], vfr, vfg, vfb;
255
256 // determine color distribution
257 int cov[6];
258 int mu[3], min[3], max[3];
259 int ch, i, iter;
260
261 for (ch = 0; ch < 3; ch++) {
262 const unsigned char* bp = ((const unsigned char*)block) + ch;
263 int muv, minv, maxv;
264
265 muv = minv = maxv = bp[0];
266 for (i = 4; i < 64; i += 4) {
267 muv += bp[i];
268 if (bp[i] < minv)
269 minv = bp[i];
270 else if (bp[i] > maxv)
271 maxv = bp[i];
272 }
273
274 mu[ch] = (muv + 8) >> 4;
275 min[ch] = minv;
276 max[ch] = maxv;
277 }
278
279 // determine covariance matrix
280 for (i = 0; i < 6; i++)
281 cov[i] = 0;
282
283 for (i = 0; i < 16; i++) {
284 int r = block[i * 4 + 0] - mu[0];
285 int g = block[i * 4 + 1] - mu[1];
286 int b = block[i * 4 + 2] - mu[2];
287
288 cov[0] += r * r;
289 cov[1] += r * g;
290 cov[2] += r * b;
291 cov[3] += g * g;
292 cov[4] += g * b;
293 cov[5] += b * b;
294 }
295
296 // convert covariance matrix to float, find principal axis via power iter
297 for (i = 0; i < 6; i++)
298 covf[i] = static_cast<float>(cov[i]) / 255.0f;
299
300 vfr = (float)(max[0] - min[0]);
301 vfg = (float)(max[1] - min[1]);
302 vfb = (float)(max[2] - min[2]);
303
304 for (iter = 0; iter < nIterPower; iter++) {
305 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
306 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
307 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
308
309 vfr = r;
310 vfg = g;
311 vfb = b;
312 }
313
314 magn = STBD_FABS(vfr);
315 if (STBD_FABS(vfg) > magn)
316 magn = STBD_FABS(vfg);
317 if (STBD_FABS(vfb) > magn)
318 magn = STBD_FABS(vfb);
319
320 if (magn < 4.0f) { // too small, default to luminance
321 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
322 v_g = 587;
323 v_b = 114;
324 } else {
325 magn = 512.0 / magn;
326 v_r = (int)(vfr * magn);
327 v_g = (int)(vfg * magn);
328 v_b = (int)(vfb * magn);
329 }
330
331 minp = maxp = block;
332 mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
333 // Pick colors at extreme points
334 for (i = 1; i < 16; i++) {
335 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
336
337 if (dot < mind) {
338 mind = dot;
339 minp = block + i * 4;
340 }
341
342 if (dot > maxd) {
343 maxd = dot;
344 maxp = block + i * 4;
345 }
346 }
347
348 *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
349 *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
350 stb__ReorderColors(pmax16, pmin16);
351}
352
353static void stb__OptimizeColorsAlphaBlock(unsigned char* block, unsigned short* pmax16,
354 unsigned short* pmin16) {
355 int mind, maxd;
356 unsigned char *minp, *maxp;
357 double magn;
358 int v_r, v_g, v_b;
359 static const int nIterPower = 4;
360 float covf[6], vfr, vfg, vfb;
361
362 // determine color distribution
363 int cov[6];
364 int mu[3], min[3], max[3];
365 int ch, i, iter;
366
367 for (ch = 0; ch < 3; ch++) {
368 const unsigned char* bp = ((const unsigned char*)block) + ch;
369 int muv = 0, minv = 256, maxv = -1;
370 int num = 0;
371
372 for (i = 0; i < 64; i += 4) {
373 if (bp[3 - ch] == 0) {
374 continue;
375 }
376
377 muv += bp[i];
378 if (bp[i] < minv)
379 minv = bp[i];
380 else if (bp[i] > maxv)
381 maxv = bp[i];
382
383 num++;
384 }
385
386 mu[ch] = num > 0 ? (muv + 8) / num : 0;
387 min[ch] = minv;
388 max[ch] = maxv;
389 }
390
391 // determine covariance matrix
392 for (i = 0; i < 6; i++)
393 cov[i] = 0;
394
395 for (i = 0; i < 16; i++) {
396 if (block[i * 4 + 3] == 0) {
397 continue;
398 }
399
400 int r = block[i * 4 + 0] - mu[0];
401 int g = block[i * 4 + 1] - mu[1];
402 int b = block[i * 4 + 2] - mu[2];
403
404 cov[0] += r * r;
405 cov[1] += r * g;
406 cov[2] += r * b;
407 cov[3] += g * g;
408 cov[4] += g * b;
409 cov[5] += b * b;
410 }
411
412 // convert covariance matrix to float, find principal axis via power iter
413 for (i = 0; i < 6; i++)
414 covf[i] = static_cast<float>(cov[i]) / 255.0f;
415
416 vfr = (float)(max[0] - min[0]);
417 vfg = (float)(max[1] - min[1]);
418 vfb = (float)(max[2] - min[2]);
419
420 for (iter = 0; iter < nIterPower; iter++) {
421 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
422 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
423 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
424
425 vfr = r;
426 vfg = g;
427 vfb = b;
428 }
429
430 magn = STBD_FABS(vfr);
431 if (STBD_FABS(vfg) > magn)
432 magn = STBD_FABS(vfg);
433 if (STBD_FABS(vfb) > magn)
434 magn = STBD_FABS(vfb);
435
436 if (magn < 4.0f) { // too small, default to luminance
437 v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000.
438 v_g = 587;
439 v_b = 114;
440 } else {
441 magn = 512.0 / magn;
442 v_r = (int)(vfr * magn);
443 v_g = (int)(vfg * magn);
444 v_b = (int)(vfb * magn);
445 }
446
447 minp = maxp = NULL;
448 mind = 0x7fffffff;
449 maxd = -0x80000000;
450
451 // Pick colors at extreme points
452 for (i = 0; i < 16; i++) {
453 if (block[i * 4 + 3] == 0) {
454 continue;
455 }
456
457 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + block[i * 4 + 2] * v_b;
458
459 if (dot < mind) {
460 mind = dot;
461 minp = block + i * 4;
462 }
463
464 if (dot > maxd) {
465 maxd = dot;
466 maxp = block + i * 4;
467 }
468 }
469
470 if (!maxp) {
471 // all alpha, no color
472 *pmin16 = 0xffff;
473 *pmax16 = 0;
474 } else {
475 // endpoint colors found
476 *pmax16 = stb__As16Bit(maxp[0], maxp[1], maxp[2]);
477 *pmin16 = stb__As16Bit(minp[0], minp[1], minp[2]);
478
479 if (*pmax16 == *pmin16) {
480 // modify the endpoints to indicate presence of an alpha block
481 if (*pmax16 > 0) {
482 (*pmax16)--;
483 } else {
484 (*pmin16)++;
485 }
486 }
487
488 stb__ReorderColors(pmax16, pmin16);
489 }
490}
491
492static const float stb__midpoints5[32] = {
493 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f,
494 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
495 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f,
496 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f};
497
498static const float stb__midpoints6[64] = {
499 0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f,
500 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f,
501 0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f,
502 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f,
503 0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f,
504 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f,
505 0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f,
506 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f};
507
508static unsigned short stb__Quantize5(float x) {
509 unsigned short q;
510 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
511 q = (unsigned short)(x * 31);
512 q += (x > stb__midpoints5[q]);
513 return q;
514}
515
516static unsigned short stb__Quantize6(float x) {
517 unsigned short q;
518 x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate
519 q = (unsigned short)(x * 63);
520 q += (x > stb__midpoints6[q]);
521 return q;
522}
523
524// The refinement function. (Clever code, part 2)
525// Tries to optimize colors to suit block contents better.
526// (By solving a least squares system via normal equations+Cramer's rule)
527static int stb__RefineBlock(unsigned char* block, unsigned short* pmax16, unsigned short* pmin16,
528 unsigned int mask) {
529 static const int w1Tab[4] = {3, 0, 2, 1};
530 static const int prods[4] = {0x090000, 0x000900, 0x040102, 0x010402};
531 // ^some magic to save a lot of multiplies in the accumulating loop...
532 // (precomputed products of weights for least squares system, accumulated
533 // inside one 32-bit register)
534
535 float f;
536 unsigned short oldMin, oldMax, min16, max16;
537 int i, akku = 0, xx, xy, yy;
538 int At1_r, At1_g, At1_b;
539 int At2_r, At2_g, At2_b;
540 unsigned int cm = mask;
541
542 oldMin = *pmin16;
543 oldMax = *pmax16;
544
545 if ((mask ^ (mask << 2)) < 4) // all pixels have the same index?
546 {
547 // yes, linear system would be singular; solve using optimal
548 // single-color match on average color
549 int r = 8, g = 8, b = 8;
550 for (i = 0; i < 16; ++i) {
551 r += block[i * 4 + 0];
552 g += block[i * 4 + 1];
553 b += block[i * 4 + 2];
554 }
555
556 r >>= 4;
557 g >>= 4;
558 b >>= 4;
559
560 max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
561 stb__OMatch5[b][0]);
562 min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
563 stb__OMatch5[b][1]);
564 } else {
565 At1_r = At1_g = At1_b = 0;
566 At2_r = At2_g = At2_b = 0;
567 for (i = 0; i < 16; ++i, cm >>= 2) {
568 int step = cm & 3;
569 int w1 = w1Tab[step];
570 int r = block[i * 4 + 0];
571 int g = block[i * 4 + 1];
572 int b = block[i * 4 + 2];
573
574 akku += prods[step];
575 At1_r += w1 * r;
576 At1_g += w1 * g;
577 At1_b += w1 * b;
578 At2_r += r;
579 At2_g += g;
580 At2_b += b;
581 }
582
583 At2_r = 3 * At2_r - At1_r;
584 At2_g = 3 * At2_g - At1_g;
585 At2_b = 3 * At2_b - At1_b;
586
587 // extract solutions and decide solvability
588 xx = akku >> 16;
589 yy = (akku >> 8) & 0xff;
590 xy = (akku >> 0) & 0xff;
591
592 f = 3.0f / 255.0f / static_cast<float>(xx * yy - xy * xy);
593
594 max16 = static_cast<unsigned short>(
595 stb__Quantize5(static_cast<float>(At1_r * yy - At2_r * xy) * f) << 11);
596 max16 |= static_cast<unsigned short>(
597 stb__Quantize6(static_cast<float>(At1_g * yy - At2_g * xy) * f) << 5);
598 max16 |= static_cast<unsigned short>(
599 stb__Quantize5(static_cast<float>(At1_b * yy - At2_b * xy) * f) << 0);
600
601 min16 = static_cast<unsigned short>(
602 stb__Quantize5(static_cast<float>(At2_r * xx - At1_r * xy) * f) << 11);
603 min16 |= static_cast<unsigned short>(
604 stb__Quantize6(static_cast<float>(At2_g * xx - At1_g * xy) * f) << 5);
605 min16 |= static_cast<unsigned short>(
606 stb__Quantize5(static_cast<float>(At2_b * xx - At1_b * xy) * f) << 0);
607 }
608
609 *pmin16 = min16;
610 *pmax16 = max16;
611 stb__ReorderColors(pmax16, pmin16);
612
613 return oldMin != min16 || oldMax != max16;
614}
615
616// Color block compression
617static void stb__CompressColorBlock(unsigned char* dest, unsigned char* block, int alpha,
618 int mode) {
619 unsigned int mask;
620 int i;
621 int refinecount;
622 unsigned short max16, min16;
623 unsigned char color[4 * 4];
624
625 refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1;
626
627 // check if block is constant
628 for (i = 1; i < 16; i++)
629 if (((unsigned int*)block)[i] != ((unsigned int*)block)[0])
630 break;
631
632 if (i == 16 && block[3] == 0 && alpha) { // constant alpha
633 mask = 0xffffffff;
634 max16 = 0;
635 min16 = 0xffff;
636 } else if (i == 16) { // constant color
637 int r = block[0], g = block[1], b = block[2];
638 mask = 0xaaaaaaaa;
639 max16 = static_cast<unsigned short>((stb__OMatch5[r][0] << 11) | (stb__OMatch6[g][0] << 5) |
640 stb__OMatch5[b][0]);
641 min16 = static_cast<unsigned short>((stb__OMatch5[r][1] << 11) | (stb__OMatch6[g][1] << 5) |
642 stb__OMatch5[b][1]);
643 } else if (alpha) {
644 stb__OptimizeColorsAlphaBlock(block, &max16, &min16);
645 stb__Eval3Colors(color, max16, min16);
646 mask = stb__MatchColorsAlphaBlock(block, color);
647 } else {
648 // first step: PCA+map along principal axis
649 stb__OptimizeColorsBlock(block, &max16, &min16);
650 if (max16 != min16) {
651 stb__Eval4Colors(color, max16, min16);
652 mask = stb__MatchColorsBlock(block, color);
653 } else
654 mask = 0;
655
656 // third step: refine (multiple times if requested)
657 for (i = 0; i < refinecount; i++) {
658 unsigned int lastmask = mask;
659
660 if (stb__RefineBlock(block, &max16, &min16, mask)) {
661 if (max16 != min16) {
662 stb__Eval4Colors(color, max16, min16);
663 mask = stb__MatchColorsBlock(block, color);
664 } else {
665 mask = 0;
666 break;
667 }
668 }
669
670 if (mask == lastmask)
671 break;
672 }
673 }
674
675 // write the color block
676 if (!alpha)
677 stb__FinalizeColors(&max16, &min16, &mask);
678
679 dest[0] = (unsigned char)(max16);
680 dest[1] = (unsigned char)(max16 >> 8);
681 dest[2] = (unsigned char)(min16);
682 dest[3] = (unsigned char)(min16 >> 8);
683 dest[4] = (unsigned char)(mask);
684 dest[5] = (unsigned char)(mask >> 8);
685 dest[6] = (unsigned char)(mask >> 16);
686 dest[7] = (unsigned char)(mask >> 24);
687}
688
689// Alpha block compression (this is easy for a change)
690static void stb__CompressAlphaBlock(unsigned char* dest, unsigned char* src, int stride) {
691 int i, dist, bias, dist4, dist2, bits, mask;
692
693 // find min/max color
694 int mn, mx;
695 mn = mx = src[0];
696
697 for (i = 1; i < 16; i++) {
698 if (src[i * stride] < mn)
699 mn = src[i * stride];
700 else if (src[i * stride] > mx)
701 mx = src[i * stride];
702 }
703
704 // encode them
705 dest[0] = (unsigned char)mx;
706 dest[1] = (unsigned char)mn;
707 dest += 2;
708
709 // determine bias and emit color indices
710 // given the choice of mx/mn, these indices are optimal:
711 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
712 dist = mx - mn;
713 dist4 = dist * 4;
714 dist2 = dist * 2;
715 bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2);
716 bias -= mn * 7;
717 bits = 0, mask = 0;
718
719 for (i = 0; i < 16; i++) {
720 int a = src[i * stride] * 7 + bias;
721 int ind, t;
722
723 // select index. this is a "linear scale" lerp factor between 0 (val=min)
724 // and 7 (val=max).
725 t = (a >= dist4) ? -1 : 0;
726 ind = t & 4;
727 a -= dist4 & t;
728 t = (a >= dist2) ? -1 : 0;
729 ind += t & 2;
730 a -= dist2 & t;
731 ind += (a >= dist);
732
733 // turn linear scale into DXT index (0/1 are extremal pts)
734 ind = -ind & 7;
735 ind ^= (2 > ind);
736
737 // write index
738 mask |= ind << bits;
739 if ((bits += 3) >= 8) {
740 *dest++ = (unsigned char)mask;
741 mask >>= 8;
742 bits -= 8;
743 }
744 }
745}
746
747void stb_compress_bc1_block(unsigned char* dest, const unsigned char* src, int alpha, int mode) {
748 stb__CompressColorBlock(dest, (unsigned char*)src, alpha, mode);
749}
750
751void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode) {
752 unsigned char data[16][4];
753 int i;
754
755 stb__CompressAlphaBlock(dest, (unsigned char*)src + 3, 4);
756 dest += 8;
757 // make a new copy of the data in which alpha is opaque,
758 // because code uses a fast test for color constancy
759 memcpy(data, src, 4 * 16);
760 for (i = 0; i < 16; ++i)
761 data[i][3] = 255;
762 src = &data[0][0];
763
764 stb__CompressColorBlock(dest, (unsigned char*)src, 0, mode);
765}
diff --git a/externals/stb/stb_dxt.h b/externals/stb/stb_dxt.h
new file mode 100644
index 000000000..07d1d1de4
--- /dev/null
+++ b/externals/stb/stb_dxt.h
@@ -0,0 +1,36 @@
1// SPDX-FileCopyrightText: fabian "ryg" giesen
2// SPDX-License-Identifier: MIT
3
4// stb_dxt.h - v1.12 - DXT1/DXT5 compressor
5
6#ifndef STB_INCLUDE_STB_DXT_H
7#define STB_INCLUDE_STB_DXT_H
8
9#ifdef __cplusplus
10extern "C" {
11#endif
12
13#ifdef STB_DXT_STATIC
14#define STBDDEF static
15#else
16#define STBDDEF extern
17#endif
18
19// compression mode (bitflags)
20#define STB_DXT_NORMAL 0
21#define STB_DXT_DITHER 1 // use dithering. was always dubious, now deprecated. does nothing!
22#define STB_DXT_HIGHQUAL \
23 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower.
24
25STBDDEF void stb_compress_bc1_block(unsigned char* dest,
26 const unsigned char* src_rgba_four_bytes_per_pixel, int alpha,
27 int mode);
28
29STBDDEF void stb_compress_bc3_block(unsigned char* dest, const unsigned char* src, int mode);
30
31#define STB_COMPRESS_DXT_BLOCK
32
33#ifdef __cplusplus
34}
35#endif
36#endif // STB_INCLUDE_STB_DXT_H
diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp
index 503f40349..1cbeed302 100644
--- a/src/audio_core/renderer/adsp/audio_renderer.cpp
+++ b/src/audio_core/renderer/adsp/audio_renderer.cpp
@@ -154,6 +154,11 @@ void AudioRenderer::ThreadFunc() {
154 return; 154 return;
155 155
156 case RenderMessage::AudioRenderer_Render: { 156 case RenderMessage::AudioRenderer_Render: {
157 if (system.IsShuttingDown()) [[unlikely]] {
158 std::this_thread::sleep_for(std::chrono::milliseconds(5));
159 mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_RenderResponse);
160 continue;
161 }
157 std::array<bool, MaxRendererSessions> buffers_reset{}; 162 std::array<bool, MaxRendererSessions> buffers_reset{};
158 std::array<u64, MaxRendererSessions> render_times_taken{}; 163 std::array<u64, MaxRendererSessions> render_times_taken{};
159 const auto start_time{system.CoreTiming().GetClockTicks()}; 164 const auto start_time{system.CoreTiming().GetClockTicks()};
diff --git a/src/audio_core/renderer/system_manager.cpp b/src/audio_core/renderer/system_manager.cpp
index 07d8ed093..300ecdbf1 100644
--- a/src/audio_core/renderer/system_manager.cpp
+++ b/src/audio_core/renderer/system_manager.cpp
@@ -27,7 +27,7 @@ bool SystemManager::InitializeUnsafe() {
27 if (!active) { 27 if (!active) {
28 if (adsp.Start()) { 28 if (adsp.Start()) {
29 active = true; 29 active = true;
30 thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(); }); 30 thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(stop_token); });
31 } 31 }
32 } 32 }
33 33
@@ -39,8 +39,7 @@ void SystemManager::Stop() {
39 return; 39 return;
40 } 40 }
41 active = false; 41 active = false;
42 update.store(true); 42 thread.request_stop();
43 update.notify_all();
44 thread.join(); 43 thread.join();
45 adsp.Stop(); 44 adsp.Stop();
46} 45}
@@ -85,12 +84,12 @@ bool SystemManager::Remove(System& system_) {
85 return true; 84 return true;
86} 85}
87 86
88void SystemManager::ThreadFunc() { 87void SystemManager::ThreadFunc(std::stop_token stop_token) {
89 static constexpr char name[]{"AudioRenderSystemManager"}; 88 static constexpr char name[]{"AudioRenderSystemManager"};
90 MicroProfileOnThreadCreate(name); 89 MicroProfileOnThreadCreate(name);
91 Common::SetCurrentThreadName(name); 90 Common::SetCurrentThreadName(name);
92 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 91 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
93 while (active) { 92 while (active && !stop_token.stop_requested()) {
94 { 93 {
95 std::scoped_lock l{mutex1}; 94 std::scoped_lock l{mutex1};
96 95
diff --git a/src/audio_core/renderer/system_manager.h b/src/audio_core/renderer/system_manager.h
index 1f0bbd8b4..9681fd121 100644
--- a/src/audio_core/renderer/system_manager.h
+++ b/src/audio_core/renderer/system_manager.h
@@ -66,13 +66,7 @@ private:
66 /** 66 /**
67 * Main thread responsible for command generation. 67 * Main thread responsible for command generation.
68 */ 68 */
69 void ThreadFunc(); 69 void ThreadFunc(std::stop_token stop_token);
70
71 enum class StreamState {
72 Filling,
73 Steady,
74 Draining,
75 };
76 70
77 /// Core system 71 /// Core system
78 Core::System& core; 72 Core::System& core;
@@ -90,8 +84,6 @@ private:
90 ADSP::ADSP& adsp; 84 ADSP::ADSP& adsp;
91 /// AudioRenderer mailbox for communication 85 /// AudioRenderer mailbox for communication
92 ADSP::AudioRenderer_Mailbox* mailbox{}; 86 ADSP::AudioRenderer_Mailbox* mailbox{};
93 /// Atomic for main thread to wait on
94 std::atomic<bool> update{};
95}; 87};
96 88
97} // namespace AudioCore::AudioRenderer 89} // namespace AudioCore::AudioRenderer
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
index 13ba26e74..9bbb54162 100644
--- a/src/audio_core/sink/sink_stream.cpp
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -271,8 +271,8 @@ u64 SinkStream::GetExpectedPlayedSampleCount() {
271 271
272void SinkStream::WaitFreeSpace() { 272void SinkStream::WaitFreeSpace() {
273 std::unique_lock lk{release_mutex}; 273 std::unique_lock lk{release_mutex};
274 release_cv.wait( 274 release_cv.wait_for(lk, std::chrono::milliseconds(5),
275 lk, [this]() { return queued_buffers < max_queue_size || system.IsShuttingDown(); }); 275 [this]() { return queued_buffers < max_queue_size; });
276} 276}
277 277
278} // namespace AudioCore::Sink 278} // namespace AudioCore::Sink
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index ba617aea1..ff53e80bb 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -61,6 +61,7 @@ void LogSettings() {
61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); 61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); 62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); 63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
64 log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
64 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue()); 65 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
65 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue()); 66 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue());
66 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); 67 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -224,6 +225,7 @@ void RestoreGlobalState(bool is_powered_on) {
224 values.nvdec_emulation.SetGlobal(true); 225 values.nvdec_emulation.SetGlobal(true);
225 values.accelerate_astc.SetGlobal(true); 226 values.accelerate_astc.SetGlobal(true);
226 values.async_astc.SetGlobal(true); 227 values.async_astc.SetGlobal(true);
228 values.astc_recompression.SetGlobal(true);
227 values.use_reactive_flushing.SetGlobal(true); 229 values.use_reactive_flushing.SetGlobal(true);
228 values.shader_backend.SetGlobal(true); 230 values.shader_backend.SetGlobal(true);
229 values.use_asynchronous_shaders.SetGlobal(true); 231 values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 36ffcd693..7f865b2a7 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -90,6 +90,12 @@ enum class AntiAliasing : u32 {
90 LastAA = Smaa, 90 LastAA = Smaa,
91}; 91};
92 92
93enum class AstcRecompression : u32 {
94 Uncompressed = 0,
95 Bc1 = 1,
96 Bc3 = 2,
97};
98
93struct ResolutionScalingInfo { 99struct ResolutionScalingInfo {
94 u32 up_scale{1}; 100 u32 up_scale{1};
95 u32 down_shift{0}; 101 u32 down_shift{0};
@@ -473,6 +479,9 @@ struct Values {
473 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, 479 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
474 "use_vulkan_driver_pipeline_cache"}; 480 "use_vulkan_driver_pipeline_cache"};
475 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"}; 481 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"};
482 SwitchableSetting<AstcRecompression, true> astc_recompression{
483 AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
484 "astc_recompression"};
476 485
477 SwitchableSetting<u8> bg_red{0, "bg_red"}; 486 SwitchableSetting<u8> bg_red{0, "bg_red"};
478 SwitchableSetting<u8> bg_green{0, "bg_green"}; 487 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index 366880711..bbfea7117 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -1283,9 +1283,14 @@ bool EmulatedController::HasNfc() const {
1283} 1283}
1284 1284
1285bool EmulatedController::WriteNfc(const std::vector<u8>& data) { 1285bool EmulatedController::WriteNfc(const std::vector<u8>& data) {
1286 auto& nfc_output_device = output_devices[3]; 1286 auto& nfc_output_device = output_devices[static_cast<std::size_t>(DeviceIndex::Right)];
1287 auto& nfc_virtual_output_device = output_devices[3];
1288
1289 if (nfc_output_device->SupportsNfc() != Common::Input::NfcState::NotSupported) {
1290 return nfc_output_device->WriteNfcData(data) == Common::Input::NfcState::Success;
1291 }
1287 1292
1288 return nfc_output_device->WriteNfcData(data) == Common::Input::NfcState::Success; 1293 return nfc_virtual_output_device->WriteNfcData(data) == Common::Input::NfcState::Success;
1289} 1294}
1290 1295
1291void EmulatedController::SetLedPattern() { 1296void EmulatedController::SetLedPattern() {
diff --git a/src/core/hle/kernel/k_memory_block_manager.h b/src/core/hle/kernel/k_memory_block_manager.h
index 7c0bd16f0..96496e990 100644
--- a/src/core/hle/kernel/k_memory_block_manager.h
+++ b/src/core/hle/kernel/k_memory_block_manager.h
@@ -144,14 +144,10 @@ private:
144 144
145class KScopedMemoryBlockManagerAuditor { 145class KScopedMemoryBlockManagerAuditor {
146public: 146public:
147 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) { 147 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) {}
148 ASSERT(m_manager->CheckState());
149 }
150 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager& m) 148 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager& m)
151 : KScopedMemoryBlockManagerAuditor(std::addressof(m)) {} 149 : KScopedMemoryBlockManagerAuditor(std::addressof(m)) {}
152 ~KScopedMemoryBlockManagerAuditor() { 150 ~KScopedMemoryBlockManagerAuditor() = default;
153 ASSERT(m_manager->CheckState());
154 }
155 151
156private: 152private:
157 KMemoryBlockManager* m_manager; 153 KMemoryBlockManager* m_manager;
diff --git a/src/core/hle/service/nfc/common/amiibo_crypto.cpp b/src/core/hle/service/nfc/common/amiibo_crypto.cpp
index f3901ee8d..b2bcb68c3 100644
--- a/src/core/hle/service/nfc/common/amiibo_crypto.cpp
+++ b/src/core/hle/service/nfc/common/amiibo_crypto.cpp
@@ -52,9 +52,6 @@ bool IsAmiiboValid(const EncryptedNTAG215File& ntag_file) {
52 if (ntag_file.compability_container != 0xEEFF10F1U) { 52 if (ntag_file.compability_container != 0xEEFF10F1U) {
53 return false; 53 return false;
54 } 54 }
55 if (amiibo_data.constant_value != 0xA5) {
56 return false;
57 }
58 if (amiibo_data.model_info.tag_type != NFC::PackedTagType::Type2) { 55 if (amiibo_data.model_info.tag_type != NFC::PackedTagType::Type2) {
59 return false; 56 return false;
60 } 57 }
diff --git a/src/core/hle/service/nfc/common/device.cpp b/src/core/hle/service/nfc/common/device.cpp
index 322bde2ed..0bd7900e1 100644
--- a/src/core/hle/service/nfc/common/device.cpp
+++ b/src/core/hle/service/nfc/common/device.cpp
@@ -119,18 +119,31 @@ bool NfcDevice::LoadNfcTag(std::span<const u8> data) {
119 119
120 memcpy(&tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File)); 120 memcpy(&tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
121 is_plain_amiibo = NFP::AmiiboCrypto::IsAmiiboValid(tag_data); 121 is_plain_amiibo = NFP::AmiiboCrypto::IsAmiiboValid(tag_data);
122 is_write_protected = false;
122 123
124 device_state = DeviceState::TagFound;
125 deactivate_event->GetReadableEvent().Clear();
126 activate_event->Signal();
127
128 // Fallback for plain amiibos
123 if (is_plain_amiibo) { 129 if (is_plain_amiibo) {
124 encrypted_tag_data = NFP::AmiiboCrypto::EncodedDataToNfcData(tag_data);
125 LOG_INFO(Service_NFP, "Using plain amiibo"); 130 LOG_INFO(Service_NFP, "Using plain amiibo");
126 } else { 131 encrypted_tag_data = NFP::AmiiboCrypto::EncodedDataToNfcData(tag_data);
127 tag_data = {}; 132 return true;
133 }
134
135 // Fallback for encrypted amiibos without keys
136 if (!NFP::AmiiboCrypto::IsKeyAvailable()) {
137 LOG_INFO(Service_NFC, "Loading amiibo without keys");
128 memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File)); 138 memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
139 BuildAmiiboWithoutKeys();
140 is_plain_amiibo = true;
141 is_write_protected = true;
142 return true;
129 } 143 }
130 144
131 device_state = DeviceState::TagFound; 145 tag_data = {};
132 deactivate_event->GetReadableEvent().Clear(); 146 memcpy(&encrypted_tag_data, data.data(), sizeof(NFP::EncryptedNTAG215File));
133 activate_event->Signal();
134 return true; 147 return true;
135} 148}
136 149
@@ -346,23 +359,15 @@ Result NfcDevice::Mount(NFP::ModelType model_type, NFP::MountTarget mount_target
346 return ResultWrongDeviceState; 359 return ResultWrongDeviceState;
347 } 360 }
348 361
349 // The loaded amiibo is not encrypted
350 if (is_plain_amiibo) {
351 device_state = DeviceState::TagMounted;
352 mount_target = mount_target_;
353 return ResultSuccess;
354 }
355
356 if (!NFP::AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) { 362 if (!NFP::AmiiboCrypto::IsAmiiboValid(encrypted_tag_data)) {
357 LOG_ERROR(Service_NFP, "Not an amiibo"); 363 LOG_ERROR(Service_NFP, "Not an amiibo");
358 return ResultNotAnAmiibo; 364 return ResultNotAnAmiibo;
359 } 365 }
360 366
361 // Mark amiibos as read only when keys are missing 367 // The loaded amiibo is not encrypted
362 if (!NFP::AmiiboCrypto::IsKeyAvailable()) { 368 if (is_plain_amiibo) {
363 LOG_ERROR(Service_NFP, "No keys detected");
364 device_state = DeviceState::TagMounted; 369 device_state = DeviceState::TagMounted;
365 mount_target = NFP::MountTarget::Rom; 370 mount_target = mount_target_;
366 return ResultSuccess; 371 return ResultSuccess;
367 } 372 }
368 373
@@ -421,11 +426,11 @@ Result NfcDevice::Flush() {
421 426
422 tag_data.write_counter++; 427 tag_data.write_counter++;
423 428
424 FlushWithBreak(NFP::BreakType::Normal); 429 const auto result = FlushWithBreak(NFP::BreakType::Normal);
425 430
426 is_data_moddified = false; 431 is_data_moddified = false;
427 432
428 return ResultSuccess; 433 return result;
429} 434}
430 435
431Result NfcDevice::FlushDebug() { 436Result NfcDevice::FlushDebug() {
@@ -444,11 +449,11 @@ Result NfcDevice::FlushDebug() {
444 449
445 tag_data.write_counter++; 450 tag_data.write_counter++;
446 451
447 FlushWithBreak(NFP::BreakType::Normal); 452 const auto result = FlushWithBreak(NFP::BreakType::Normal);
448 453
449 is_data_moddified = false; 454 is_data_moddified = false;
450 455
451 return ResultSuccess; 456 return result;
452} 457}
453 458
454Result NfcDevice::FlushWithBreak(NFP::BreakType break_type) { 459Result NfcDevice::FlushWithBreak(NFP::BreakType break_type) {
@@ -457,6 +462,11 @@ Result NfcDevice::FlushWithBreak(NFP::BreakType break_type) {
457 return ResultWrongDeviceState; 462 return ResultWrongDeviceState;
458 } 463 }
459 464
465 if (is_write_protected) {
466 LOG_ERROR(Service_NFP, "No keys available skipping write request");
467 return ResultSuccess;
468 }
469
460 std::vector<u8> data(sizeof(NFP::EncryptedNTAG215File)); 470 std::vector<u8> data(sizeof(NFP::EncryptedNTAG215File));
461 if (is_plain_amiibo) { 471 if (is_plain_amiibo) {
462 memcpy(data.data(), &tag_data, sizeof(tag_data)); 472 memcpy(data.data(), &tag_data, sizeof(tag_data));
@@ -1033,7 +1043,6 @@ Result NfcDevice::GetAll(NFP::NfpData& data) const {
1033 } 1043 }
1034 1044
1035 NFP::CommonInfo common_info{}; 1045 NFP::CommonInfo common_info{};
1036 Service::Mii::MiiManager manager;
1037 const u64 application_id = tag_data.application_id; 1046 const u64 application_id = tag_data.application_id;
1038 1047
1039 GetCommonInfo(common_info); 1048 GetCommonInfo(common_info);
@@ -1249,6 +1258,28 @@ void NfcDevice::UpdateRegisterInfoCrc() {
1249 tag_data.register_info_crc = crc.checksum(); 1258 tag_data.register_info_crc = crc.checksum();
1250} 1259}
1251 1260
1261void NfcDevice::BuildAmiiboWithoutKeys() {
1262 Service::Mii::MiiManager manager;
1263 auto& settings = tag_data.settings;
1264
1265 tag_data = NFP::AmiiboCrypto::NfcDataToEncodedData(encrypted_tag_data);
1266
1267 // Common info
1268 tag_data.write_counter = 0;
1269 tag_data.amiibo_version = 0;
1270 settings.write_date = GetAmiiboDate(GetCurrentPosixTime());
1271
1272 // Register info
1273 SetAmiiboName(settings, {'y', 'u', 'z', 'u', 'A', 'm', 'i', 'i', 'b', 'o'});
1274 settings.settings.font_region.Assign(0);
1275 settings.init_date = GetAmiiboDate(GetCurrentPosixTime());
1276 tag_data.owner_mii = manager.BuildFromStoreData(manager.BuildDefault(0));
1277
1278 // Admin info
1279 settings.settings.amiibo_initialized.Assign(1);
1280 settings.settings.appdata_initialized.Assign(0);
1281}
1282
1252u64 NfcDevice::GetHandle() const { 1283u64 NfcDevice::GetHandle() const {
1253 // Generate a handle based of the npad id 1284 // Generate a handle based of the npad id
1254 return static_cast<u64>(npad_id); 1285 return static_cast<u64>(npad_id);
diff --git a/src/core/hle/service/nfc/common/device.h b/src/core/hle/service/nfc/common/device.h
index 98e1945c1..6a37e8458 100644
--- a/src/core/hle/service/nfc/common/device.h
+++ b/src/core/hle/service/nfc/common/device.h
@@ -110,6 +110,8 @@ private:
110 void UpdateSettingsCrc(); 110 void UpdateSettingsCrc();
111 void UpdateRegisterInfoCrc(); 111 void UpdateRegisterInfoCrc();
112 112
113 void BuildAmiiboWithoutKeys();
114
113 bool is_controller_set{}; 115 bool is_controller_set{};
114 int callback_key; 116 int callback_key;
115 const Core::HID::NpadIdType npad_id; 117 const Core::HID::NpadIdType npad_id;
@@ -128,6 +130,7 @@ private:
128 bool is_data_moddified{}; 130 bool is_data_moddified{};
129 bool is_app_area_open{}; 131 bool is_app_area_open{};
130 bool is_plain_amiibo{}; 132 bool is_plain_amiibo{};
133 bool is_write_protected{};
131 NFP::MountTarget mount_target{NFP::MountTarget::None}; 134 NFP::MountTarget mount_target{NFP::MountTarget::None};
132 135
133 NFP::NTAG215File tag_data{}; 136 NFP::NTAG215File tag_data{};
diff --git a/src/input_common/drivers/joycon.cpp b/src/input_common/drivers/joycon.cpp
index 653862a72..b2b5677c8 100644
--- a/src/input_common/drivers/joycon.cpp
+++ b/src/input_common/drivers/joycon.cpp
@@ -291,9 +291,13 @@ Common::Input::NfcState Joycons::SupportsNfc(const PadIdentifier& identifier_) c
291 return Common::Input::NfcState::Success; 291 return Common::Input::NfcState::Success;
292}; 292};
293 293
294Common::Input::NfcState Joycons::WriteNfcData(const PadIdentifier& identifier_, 294Common::Input::NfcState Joycons::WriteNfcData(const PadIdentifier& identifier,
295 const std::vector<u8>& data) { 295 const std::vector<u8>& data) {
296 return Common::Input::NfcState::NotSupported; 296 auto handle = GetHandle(identifier);
297 if (handle->WriteNfcData(data) != Joycon::DriverResult::Success) {
298 return Common::Input::NfcState::WriteFailed;
299 }
300 return Common::Input::NfcState::Success;
297}; 301};
298 302
299Common::Input::DriverResult Joycons::SetPollingMode(const PadIdentifier& identifier, 303Common::Input::DriverResult Joycons::SetPollingMode(const PadIdentifier& identifier,
diff --git a/src/input_common/helpers/joycon_driver.cpp b/src/input_common/helpers/joycon_driver.cpp
index 83429a336..95106f16d 100644
--- a/src/input_common/helpers/joycon_driver.cpp
+++ b/src/input_common/helpers/joycon_driver.cpp
@@ -492,6 +492,26 @@ DriverResult JoyconDriver::SetRingConMode() {
492 return result; 492 return result;
493} 493}
494 494
495DriverResult JoyconDriver::WriteNfcData(std::span<const u8> data) {
496 std::scoped_lock lock{mutex};
497 disable_input_thread = true;
498
499 if (!supported_features.nfc) {
500 return DriverResult::NotSupported;
501 }
502 if (!nfc_protocol->IsEnabled()) {
503 return DriverResult::Disabled;
504 }
505 if (!amiibo_detected) {
506 return DriverResult::ErrorWritingData;
507 }
508
509 const auto result = nfc_protocol->WriteAmiibo(data);
510
511 disable_input_thread = false;
512 return result;
513}
514
495bool JoyconDriver::IsConnected() const { 515bool JoyconDriver::IsConnected() const {
496 std::scoped_lock lock{mutex}; 516 std::scoped_lock lock{mutex};
497 return is_connected.load(); 517 return is_connected.load();
diff --git a/src/input_common/helpers/joycon_driver.h b/src/input_common/helpers/joycon_driver.h
index 72a9e71dc..e9b2fccbb 100644
--- a/src/input_common/helpers/joycon_driver.h
+++ b/src/input_common/helpers/joycon_driver.h
@@ -49,6 +49,7 @@ public:
49 DriverResult SetIrMode(); 49 DriverResult SetIrMode();
50 DriverResult SetNfcMode(); 50 DriverResult SetNfcMode();
51 DriverResult SetRingConMode(); 51 DriverResult SetRingConMode();
52 DriverResult WriteNfcData(std::span<const u8> data);
52 53
53 void SetCallbacks(const JoyconCallbacks& callbacks); 54 void SetCallbacks(const JoyconCallbacks& callbacks);
54 55
diff --git a/src/input_common/helpers/joycon_protocol/joycon_types.h b/src/input_common/helpers/joycon_protocol/joycon_types.h
index 353dc744d..5007b0e18 100644
--- a/src/input_common/helpers/joycon_protocol/joycon_types.h
+++ b/src/input_common/helpers/joycon_protocol/joycon_types.h
@@ -23,6 +23,7 @@ constexpr std::array<u8, 8> DefaultVibrationBuffer{0x0, 0x1, 0x40, 0x40, 0x0, 0x
23 23
24using MacAddress = std::array<u8, 6>; 24using MacAddress = std::array<u8, 6>;
25using SerialNumber = std::array<u8, 15>; 25using SerialNumber = std::array<u8, 15>;
26using TagUUID = std::array<u8, 7>;
26 27
27enum class ControllerType : u8 { 28enum class ControllerType : u8 {
28 None = 0x00, 29 None = 0x00,
@@ -276,12 +277,13 @@ enum class MCUPacketFlag : u8 {
276 LastCommandPacket = 0x08, 277 LastCommandPacket = 0x08,
277}; 278};
278 279
279enum class NFCReadCommand : u8 { 280enum class NFCCommand : u8 {
280 CancelAll = 0x00, 281 CancelAll = 0x00,
281 StartPolling = 0x01, 282 StartPolling = 0x01,
282 StopPolling = 0x02, 283 StopPolling = 0x02,
283 StartWaitingRecieve = 0x04, 284 StartWaitingRecieve = 0x04,
284 Ntag = 0x06, 285 ReadNtag = 0x06,
286 WriteNtag = 0x08,
285 Mifare = 0x0F, 287 Mifare = 0x0F,
286}; 288};
287 289
@@ -292,14 +294,19 @@ enum class NFCTagType : u8 {
292 294
293enum class NFCPages { 295enum class NFCPages {
294 Block0 = 0, 296 Block0 = 0,
297 Block3 = 3,
295 Block45 = 45, 298 Block45 = 45,
296 Block135 = 135, 299 Block135 = 135,
297 Block231 = 231, 300 Block231 = 231,
298}; 301};
299 302
300enum class NFCStatus : u8 { 303enum class NFCStatus : u8 {
304 Ready = 0x00,
305 Polling = 0x01,
301 LastPackage = 0x04, 306 LastPackage = 0x04,
307 WriteDone = 0x05,
302 TagLost = 0x07, 308 TagLost = 0x07,
309 WriteReady = 0x09,
303}; 310};
304 311
305enum class IrsMode : u8 { 312enum class IrsMode : u8 {
@@ -559,13 +566,32 @@ static_assert(sizeof(NFCReadBlockCommand) == 0x9, "NFCReadBlockCommand is an inv
559struct NFCReadCommandData { 566struct NFCReadCommandData {
560 u8 unknown; 567 u8 unknown;
561 u8 uuid_length; 568 u8 uuid_length;
562 u8 unknown_2; 569 TagUUID uid;
563 std::array<u8, 6> uid;
564 NFCTagType tag_type; 570 NFCTagType tag_type;
565 NFCReadBlockCommand read_block; 571 NFCReadBlockCommand read_block;
566}; 572};
567static_assert(sizeof(NFCReadCommandData) == 0x13, "NFCReadCommandData is an invalid size"); 573static_assert(sizeof(NFCReadCommandData) == 0x13, "NFCReadCommandData is an invalid size");
568 574
575#pragma pack(push, 1)
576struct NFCWriteCommandData {
577 u8 unknown;
578 u8 uuid_length;
579 TagUUID uid;
580 NFCTagType tag_type;
581 u8 unknown2;
582 u8 unknown3;
583 u8 unknown4;
584 u8 unknown5;
585 u8 unknown6;
586 u8 unknown7;
587 u8 unknown8;
588 u8 magic;
589 u16_be write_count;
590 u8 amiibo_version;
591};
592static_assert(sizeof(NFCWriteCommandData) == 0x15, "NFCWriteCommandData is an invalid size");
593#pragma pack(pop)
594
569struct NFCPollingCommandData { 595struct NFCPollingCommandData {
570 u8 enable_mifare; 596 u8 enable_mifare;
571 u8 unknown_1; 597 u8 unknown_1;
@@ -576,8 +602,8 @@ struct NFCPollingCommandData {
576static_assert(sizeof(NFCPollingCommandData) == 0x05, "NFCPollingCommandData is an invalid size"); 602static_assert(sizeof(NFCPollingCommandData) == 0x05, "NFCPollingCommandData is an invalid size");
577 603
578struct NFCRequestState { 604struct NFCRequestState {
579 NFCReadCommand command_argument; 605 NFCCommand command_argument;
580 INSERT_PADDING_BYTES(0x1); 606 u8 block_id;
581 u8 packet_id; 607 u8 packet_id;
582 MCUPacketFlag packet_flag; 608 MCUPacketFlag packet_flag;
583 u8 data_length; 609 u8 data_length;
@@ -591,6 +617,18 @@ struct NFCRequestState {
591}; 617};
592static_assert(sizeof(NFCRequestState) == 0x26, "NFCRequestState is an invalid size"); 618static_assert(sizeof(NFCRequestState) == 0x26, "NFCRequestState is an invalid size");
593 619
620struct NFCDataChunk {
621 u8 nfc_page;
622 u8 data_size;
623 std::array<u8, 0xFF> data;
624};
625
626struct NFCWritePackage {
627 NFCWriteCommandData command_data;
628 u8 number_of_chunks;
629 std::array<NFCDataChunk, 4> data_chunks;
630};
631
594struct IrsConfigure { 632struct IrsConfigure {
595 MCUCommand command; 633 MCUCommand command;
596 MCUSubCommand sub_command; 634 MCUSubCommand sub_command;
diff --git a/src/input_common/helpers/joycon_protocol/nfc.cpp b/src/input_common/helpers/joycon_protocol/nfc.cpp
index 46c9e9489..3b7a628e5 100644
--- a/src/input_common/helpers/joycon_protocol/nfc.cpp
+++ b/src/input_common/helpers/joycon_protocol/nfc.cpp
@@ -34,6 +34,12 @@ DriverResult NfcProtocol::EnableNfc() {
34 34
35 result = ConfigureMCU(config); 35 result = ConfigureMCU(config);
36 } 36 }
37 if (result == DriverResult::Success) {
38 result = WaitSetMCUMode(ReportMode::NFC_IR_MODE_60HZ, MCUMode::NFC);
39 }
40 if (result == DriverResult::Success) {
41 result = WaitUntilNfcIs(NFCStatus::Ready);
42 }
37 43
38 return result; 44 return result;
39} 45}
@@ -56,27 +62,20 @@ DriverResult NfcProtocol::StartNFCPollingMode() {
56 LOG_DEBUG(Input, "Start NFC pooling Mode"); 62 LOG_DEBUG(Input, "Start NFC pooling Mode");
57 ScopedSetBlocking sb(this); 63 ScopedSetBlocking sb(this);
58 DriverResult result{DriverResult::Success}; 64 DriverResult result{DriverResult::Success};
59 TagFoundData tag_data{};
60 65
61 if (result == DriverResult::Success) { 66 if (result == DriverResult::Success) {
62 result = WaitSetMCUMode(ReportMode::NFC_IR_MODE_60HZ, MCUMode::NFC);
63 }
64 if (result == DriverResult::Success) {
65 result = WaitUntilNfcIsReady();
66 }
67 if (result == DriverResult::Success) {
68 MCUCommandResponse output{}; 67 MCUCommandResponse output{};
69 result = SendStopPollingRequest(output); 68 result = SendStopPollingRequest(output);
70 } 69 }
71 if (result == DriverResult::Success) { 70 if (result == DriverResult::Success) {
72 result = WaitUntilNfcIsReady(); 71 result = WaitUntilNfcIs(NFCStatus::Ready);
73 } 72 }
74 if (result == DriverResult::Success) { 73 if (result == DriverResult::Success) {
75 MCUCommandResponse output{}; 74 MCUCommandResponse output{};
76 result = SendStartPollingRequest(output); 75 result = SendStartPollingRequest(output);
77 } 76 }
78 if (result == DriverResult::Success) { 77 if (result == DriverResult::Success) {
79 result = WaitUntilNfcIsPolling(); 78 result = WaitUntilNfcIs(NFCStatus::Polling);
80 } 79 }
81 if (result == DriverResult::Success) { 80 if (result == DriverResult::Success) {
82 is_enabled = true; 81 is_enabled = true;
@@ -112,6 +111,49 @@ DriverResult NfcProtocol::ScanAmiibo(std::vector<u8>& data) {
112 return result; 111 return result;
113} 112}
114 113
114DriverResult NfcProtocol::WriteAmiibo(std::span<const u8> data) {
115 LOG_DEBUG(Input, "Write amiibo");
116 ScopedSetBlocking sb(this);
117 DriverResult result{DriverResult::Success};
118 TagUUID tag_uuid = GetTagUUID(data);
119 TagFoundData tag_data{};
120
121 if (result == DriverResult::Success) {
122 result = IsTagInRange(tag_data, 7);
123 }
124 if (result == DriverResult::Success) {
125 if (tag_data.uuid != tag_uuid) {
126 result = DriverResult::InvalidParameters;
127 }
128 }
129 if (result == DriverResult::Success) {
130 MCUCommandResponse output{};
131 result = SendStopPollingRequest(output);
132 }
133 if (result == DriverResult::Success) {
134 result = WaitUntilNfcIs(NFCStatus::Ready);
135 }
136 if (result == DriverResult::Success) {
137 MCUCommandResponse output{};
138 result = SendStartPollingRequest(output, true);
139 }
140 if (result == DriverResult::Success) {
141 result = WaitUntilNfcIs(NFCStatus::WriteReady);
142 }
143 if (result == DriverResult::Success) {
144 result = WriteAmiiboData(tag_uuid, data);
145 }
146 if (result == DriverResult::Success) {
147 result = WaitUntilNfcIs(NFCStatus::WriteDone);
148 }
149 if (result == DriverResult::Success) {
150 MCUCommandResponse output{};
151 result = SendStopPollingRequest(output);
152 }
153
154 return result;
155}
156
115bool NfcProtocol::HasAmiibo() { 157bool NfcProtocol::HasAmiibo() {
116 if (update_counter++ < AMIIBO_UPDATE_DELAY) { 158 if (update_counter++ < AMIIBO_UPDATE_DELAY) {
117 return true; 159 return true;
@@ -129,7 +171,7 @@ bool NfcProtocol::HasAmiibo() {
129 return result == DriverResult::Success; 171 return result == DriverResult::Success;
130} 172}
131 173
132DriverResult NfcProtocol::WaitUntilNfcIsReady() { 174DriverResult NfcProtocol::WaitUntilNfcIs(NFCStatus status) {
133 constexpr std::size_t timeout_limit = 10; 175 constexpr std::size_t timeout_limit = 10;
134 MCUCommandResponse output{}; 176 MCUCommandResponse output{};
135 std::size_t tries = 0; 177 std::size_t tries = 0;
@@ -145,28 +187,7 @@ DriverResult NfcProtocol::WaitUntilNfcIsReady() {
145 } 187 }
146 } while (output.mcu_report != MCUReport::NFCState || 188 } while (output.mcu_report != MCUReport::NFCState ||
147 (output.mcu_data[1] << 8) + output.mcu_data[0] != 0x0500 || 189 (output.mcu_data[1] << 8) + output.mcu_data[0] != 0x0500 ||
148 output.mcu_data[5] != 0x31 || output.mcu_data[6] != 0x00); 190 output.mcu_data[5] != 0x31 || output.mcu_data[6] != static_cast<u8>(status));
149
150 return DriverResult::Success;
151}
152
153DriverResult NfcProtocol::WaitUntilNfcIsPolling() {
154 constexpr std::size_t timeout_limit = 10;
155 MCUCommandResponse output{};
156 std::size_t tries = 0;
157
158 do {
159 auto result = SendNextPackageRequest(output, {});
160
161 if (result != DriverResult::Success) {
162 return result;
163 }
164 if (tries++ > timeout_limit) {
165 return DriverResult::Timeout;
166 }
167 } while (output.mcu_report != MCUReport::NFCState ||
168 (output.mcu_data[1] << 8) + output.mcu_data[0] != 0x0500 ||
169 output.mcu_data[5] != 0x31 || output.mcu_data[6] != 0x01);
170 191
171 return DriverResult::Success; 192 return DriverResult::Success;
172} 193}
@@ -188,7 +209,7 @@ DriverResult NfcProtocol::IsTagInRange(TagFoundData& data, std::size_t timeout_l
188 (output.mcu_data[6] != 0x09 && output.mcu_data[6] != 0x04)); 209 (output.mcu_data[6] != 0x09 && output.mcu_data[6] != 0x04));
189 210
190 data.type = output.mcu_data[12]; 211 data.type = output.mcu_data[12];
191 data.uuid.resize(output.mcu_data[14]); 212 data.uuid_size = std::min(output.mcu_data[14], static_cast<u8>(sizeof(TagUUID)));
192 memcpy(data.uuid.data(), output.mcu_data.data() + 15, data.uuid.size()); 213 memcpy(data.uuid.data(), output.mcu_data.data() + 15, data.uuid.size());
193 214
194 return DriverResult::Success; 215 return DriverResult::Success;
@@ -245,17 +266,94 @@ DriverResult NfcProtocol::GetAmiiboData(std::vector<u8>& ntag_data) {
245 return DriverResult::Timeout; 266 return DriverResult::Timeout;
246} 267}
247 268
248DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output) { 269DriverResult NfcProtocol::WriteAmiiboData(const TagUUID& tag_uuid, std::span<const u8> data) {
270 constexpr std::size_t timeout_limit = 60;
271 const auto nfc_data = MakeAmiiboWritePackage(tag_uuid, data);
272 const std::vector<u8> nfc_buffer_data = SerializeWritePackage(nfc_data);
273 std::span<const u8> buffer(nfc_buffer_data);
274 MCUCommandResponse output{};
275 u8 block_id = 1;
276 u8 package_index = 0;
277 std::size_t tries = 0;
278 std::size_t current_position = 0;
279
280 LOG_INFO(Input, "Writing amiibo data");
281
282 auto result = SendWriteAmiiboRequest(output, tag_uuid);
283
284 if (result != DriverResult::Success) {
285 return result;
286 }
287
288 // Read Tag data but ignore the actual sent data
289 while (tries++ < timeout_limit) {
290 result = SendNextPackageRequest(output, package_index);
291 const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
292
293 if (result != DriverResult::Success) {
294 return result;
295 }
296
297 if ((output.mcu_report == MCUReport::NFCReadData ||
298 output.mcu_report == MCUReport::NFCState) &&
299 nfc_status == NFCStatus::TagLost) {
300 return DriverResult::ErrorReadingData;
301 }
302
303 if (output.mcu_report == MCUReport::NFCReadData && output.mcu_data[1] == 0x07) {
304 package_index++;
305 continue;
306 }
307
308 if (output.mcu_report == MCUReport::NFCState && nfc_status == NFCStatus::LastPackage) {
309 LOG_INFO(Input, "Finished reading amiibo");
310 break;
311 }
312 }
313
314 // Send Data. Nfc buffer size is 31, Send the data in smaller packages
315 while (current_position < buffer.size() && tries++ < timeout_limit) {
316 const std::size_t next_position =
317 std::min(current_position + sizeof(NFCRequestState::raw_data), buffer.size());
318 const std::size_t block_size = next_position - current_position;
319 const bool is_last_packet = block_size < sizeof(NFCRequestState::raw_data);
320
321 SendWriteDataAmiiboRequest(output, block_id, is_last_packet,
322 buffer.subspan(current_position, block_size));
323
324 const auto nfc_status = static_cast<NFCStatus>(output.mcu_data[6]);
325
326 if ((output.mcu_report == MCUReport::NFCReadData ||
327 output.mcu_report == MCUReport::NFCState) &&
328 nfc_status == NFCStatus::TagLost) {
329 return DriverResult::ErrorReadingData;
330 }
331
332 // Increase position when data is confirmed by the joycon
333 if (output.mcu_report == MCUReport::NFCState &&
334 (output.mcu_data[1] << 8) + output.mcu_data[0] == 0x0500 &&
335 output.mcu_data[3] == block_id) {
336 block_id++;
337 current_position = next_position;
338 }
339 }
340
341 return result;
342}
343
344DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output,
345 bool is_second_attempt) {
249 NFCRequestState request{ 346 NFCRequestState request{
250 .command_argument = NFCReadCommand::StartPolling, 347 .command_argument = NFCCommand::StartPolling,
251 .packet_id = 0x0, 348 .block_id = {},
349 .packet_id = {},
252 .packet_flag = MCUPacketFlag::LastCommandPacket, 350 .packet_flag = MCUPacketFlag::LastCommandPacket,
253 .data_length = sizeof(NFCPollingCommandData), 351 .data_length = sizeof(NFCPollingCommandData),
254 .nfc_polling = 352 .nfc_polling =
255 { 353 {
256 .enable_mifare = 0x01, 354 .enable_mifare = 0x00,
257 .unknown_1 = 0x00, 355 .unknown_1 = static_cast<u8>(is_second_attempt ? 0xe8 : 0x00),
258 .unknown_2 = 0x00, 356 .unknown_2 = static_cast<u8>(is_second_attempt ? 0x03 : 0x00),
259 .unknown_3 = 0x2c, 357 .unknown_3 = 0x2c,
260 .unknown_4 = 0x01, 358 .unknown_4 = 0x01,
261 }, 359 },
@@ -271,10 +369,11 @@ DriverResult NfcProtocol::SendStartPollingRequest(MCUCommandResponse& output) {
271 369
272DriverResult NfcProtocol::SendStopPollingRequest(MCUCommandResponse& output) { 370DriverResult NfcProtocol::SendStopPollingRequest(MCUCommandResponse& output) {
273 NFCRequestState request{ 371 NFCRequestState request{
274 .command_argument = NFCReadCommand::StopPolling, 372 .command_argument = NFCCommand::StopPolling,
275 .packet_id = 0x0, 373 .block_id = {},
374 .packet_id = {},
276 .packet_flag = MCUPacketFlag::LastCommandPacket, 375 .packet_flag = MCUPacketFlag::LastCommandPacket,
277 .data_length = 0, 376 .data_length = {},
278 .raw_data = {}, 377 .raw_data = {},
279 .crc = {}, 378 .crc = {},
280 }; 379 };
@@ -288,10 +387,11 @@ DriverResult NfcProtocol::SendStopPollingRequest(MCUCommandResponse& output) {
288 387
289DriverResult NfcProtocol::SendNextPackageRequest(MCUCommandResponse& output, u8 packet_id) { 388DriverResult NfcProtocol::SendNextPackageRequest(MCUCommandResponse& output, u8 packet_id) {
290 NFCRequestState request{ 389 NFCRequestState request{
291 .command_argument = NFCReadCommand::StartWaitingRecieve, 390 .command_argument = NFCCommand::StartWaitingRecieve,
391 .block_id = {},
292 .packet_id = packet_id, 392 .packet_id = packet_id,
293 .packet_flag = MCUPacketFlag::LastCommandPacket, 393 .packet_flag = MCUPacketFlag::LastCommandPacket,
294 .data_length = 0, 394 .data_length = {},
295 .raw_data = {}, 395 .raw_data = {},
296 .crc = {}, 396 .crc = {},
297 }; 397 };
@@ -305,17 +405,17 @@ DriverResult NfcProtocol::SendNextPackageRequest(MCUCommandResponse& output, u8
305 405
306DriverResult NfcProtocol::SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages) { 406DriverResult NfcProtocol::SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages) {
307 NFCRequestState request{ 407 NFCRequestState request{
308 .command_argument = NFCReadCommand::Ntag, 408 .command_argument = NFCCommand::ReadNtag,
309 .packet_id = 0x0, 409 .block_id = {},
410 .packet_id = {},
310 .packet_flag = MCUPacketFlag::LastCommandPacket, 411 .packet_flag = MCUPacketFlag::LastCommandPacket,
311 .data_length = sizeof(NFCReadCommandData), 412 .data_length = sizeof(NFCReadCommandData),
312 .nfc_read = 413 .nfc_read =
313 { 414 {
314 .unknown = 0xd0, 415 .unknown = 0xd0,
315 .uuid_length = 0x07, 416 .uuid_length = sizeof(NFCReadCommandData::uid),
316 .unknown_2 = 0x00,
317 .uid = {}, 417 .uid = {},
318 .tag_type = NFCTagType::AllTags, 418 .tag_type = NFCTagType::Ntag215,
319 .read_block = GetReadBlockCommand(ntag_pages), 419 .read_block = GetReadBlockCommand(ntag_pages),
320 }, 420 },
321 .crc = {}, 421 .crc = {},
@@ -328,12 +428,135 @@ DriverResult NfcProtocol::SendReadAmiiboRequest(MCUCommandResponse& output, NFCP
328 output); 428 output);
329} 429}
330 430
431DriverResult NfcProtocol::SendWriteAmiiboRequest(MCUCommandResponse& output,
432 const TagUUID& tag_uuid) {
433 NFCRequestState request{
434 .command_argument = NFCCommand::ReadNtag,
435 .block_id = {},
436 .packet_id = {},
437 .packet_flag = MCUPacketFlag::LastCommandPacket,
438 .data_length = sizeof(NFCReadCommandData),
439 .nfc_read =
440 {
441 .unknown = 0xd0,
442 .uuid_length = sizeof(NFCReadCommandData::uid),
443 .uid = tag_uuid,
444 .tag_type = NFCTagType::Ntag215,
445 .read_block = GetReadBlockCommand(NFCPages::Block3),
446 },
447 .crc = {},
448 };
449
450 std::array<u8, sizeof(NFCRequestState)> request_data{};
451 memcpy(request_data.data(), &request, sizeof(NFCRequestState));
452 request_data[36] = CalculateMCU_CRC8(request_data.data(), 36);
453 return SendMCUData(ReportMode::NFC_IR_MODE_60HZ, MCUSubCommand::ReadDeviceMode, request_data,
454 output);
455}
456
457DriverResult NfcProtocol::SendWriteDataAmiiboRequest(MCUCommandResponse& output, u8 block_id,
458 bool is_last_packet,
459 std::span<const u8> data) {
460 const auto data_size = std::min(data.size(), sizeof(NFCRequestState::raw_data));
461 NFCRequestState request{
462 .command_argument = NFCCommand::WriteNtag,
463 .block_id = block_id,
464 .packet_id = {},
465 .packet_flag =
466 is_last_packet ? MCUPacketFlag::LastCommandPacket : MCUPacketFlag::MorePacketsRemaining,
467 .data_length = static_cast<u8>(data_size),
468 .raw_data = {},
469 .crc = {},
470 };
471 memcpy(request.raw_data.data(), data.data(), data_size);
472
473 std::array<u8, sizeof(NFCRequestState)> request_data{};
474 memcpy(request_data.data(), &request, sizeof(NFCRequestState));
475 request_data[36] = CalculateMCU_CRC8(request_data.data(), 36);
476 return SendMCUData(ReportMode::NFC_IR_MODE_60HZ, MCUSubCommand::ReadDeviceMode, request_data,
477 output);
478}
479
480std::vector<u8> NfcProtocol::SerializeWritePackage(const NFCWritePackage& package) const {
481 const std::size_t header_size =
482 sizeof(NFCWriteCommandData) + sizeof(NFCWritePackage::number_of_chunks);
483 std::vector<u8> serialized_data(header_size);
484 std::size_t start_index = 0;
485
486 memcpy(serialized_data.data(), &package, header_size);
487 start_index += header_size;
488
489 for (const auto& data_chunk : package.data_chunks) {
490 const std::size_t chunk_size =
491 sizeof(NFCDataChunk::nfc_page) + sizeof(NFCDataChunk::data_size) + data_chunk.data_size;
492
493 serialized_data.resize(start_index + chunk_size);
494 memcpy(serialized_data.data() + start_index, &data_chunk, chunk_size);
495 start_index += chunk_size;
496 }
497
498 return serialized_data;
499}
500
501NFCWritePackage NfcProtocol::MakeAmiiboWritePackage(const TagUUID& tag_uuid,
502 std::span<const u8> data) const {
503 return {
504 .command_data{
505 .unknown = 0xd0,
506 .uuid_length = sizeof(NFCReadCommandData::uid),
507 .uid = tag_uuid,
508 .tag_type = NFCTagType::Ntag215,
509 .unknown2 = 0x00,
510 .unknown3 = 0x01,
511 .unknown4 = 0x04,
512 .unknown5 = 0xff,
513 .unknown6 = 0xff,
514 .unknown7 = 0xff,
515 .unknown8 = 0xff,
516 .magic = data[16],
517 .write_count = static_cast<u16>((data[17] << 8) + data[18]),
518 .amiibo_version = data[19],
519 },
520 .number_of_chunks = 3,
521 .data_chunks =
522 {
523 MakeAmiiboChunk(0x05, 0x20, data),
524 MakeAmiiboChunk(0x20, 0xf0, data),
525 MakeAmiiboChunk(0x5c, 0x98, data),
526 },
527 };
528}
529
530NFCDataChunk NfcProtocol::MakeAmiiboChunk(u8 page, u8 size, std::span<const u8> data) const {
531 constexpr u8 PAGE_SIZE = 4;
532
533 if (static_cast<std::size_t>(page * PAGE_SIZE) + size >= data.size()) {
534 return {};
535 }
536
537 NFCDataChunk chunk{
538 .nfc_page = page,
539 .data_size = size,
540 .data = {},
541 };
542 std::memcpy(chunk.data.data(), data.data() + (page * PAGE_SIZE), size);
543 return chunk;
544}
545
331NFCReadBlockCommand NfcProtocol::GetReadBlockCommand(NFCPages pages) const { 546NFCReadBlockCommand NfcProtocol::GetReadBlockCommand(NFCPages pages) const {
332 switch (pages) { 547 switch (pages) {
333 case NFCPages::Block0: 548 case NFCPages::Block0:
334 return { 549 return {
335 .block_count = 1, 550 .block_count = 1,
336 }; 551 };
552 case NFCPages::Block3:
553 return {
554 .block_count = 1,
555 .blocks =
556 {
557 NFCReadBlock{0x03, 0x03},
558 },
559 };
337 case NFCPages::Block45: 560 case NFCPages::Block45:
338 return { 561 return {
339 .block_count = 1, 562 .block_count = 1,
@@ -368,6 +591,17 @@ NFCReadBlockCommand NfcProtocol::GetReadBlockCommand(NFCPages pages) const {
368 }; 591 };
369} 592}
370 593
594TagUUID NfcProtocol::GetTagUUID(std::span<const u8> data) const {
595 if (data.size() < 10) {
596 return {};
597 }
598
599 // crc byte 3 is omitted in this operation
600 return {
601 data[0], data[1], data[2], data[4], data[5], data[6], data[7],
602 };
603}
604
371bool NfcProtocol::IsEnabled() const { 605bool NfcProtocol::IsEnabled() const {
372 return is_enabled; 606 return is_enabled;
373} 607}
diff --git a/src/input_common/helpers/joycon_protocol/nfc.h b/src/input_common/helpers/joycon_protocol/nfc.h
index c9e9af03f..eb58c427d 100644
--- a/src/input_common/helpers/joycon_protocol/nfc.h
+++ b/src/input_common/helpers/joycon_protocol/nfc.h
@@ -27,6 +27,8 @@ public:
27 27
28 DriverResult ScanAmiibo(std::vector<u8>& data); 28 DriverResult ScanAmiibo(std::vector<u8>& data);
29 29
30 DriverResult WriteAmiibo(std::span<const u8> data);
31
30 bool HasAmiibo(); 32 bool HasAmiibo();
31 33
32 bool IsEnabled() const; 34 bool IsEnabled() const;
@@ -37,18 +39,20 @@ private:
37 39
38 struct TagFoundData { 40 struct TagFoundData {
39 u8 type; 41 u8 type;
40 std::vector<u8> uuid; 42 u8 uuid_size;
43 TagUUID uuid;
41 }; 44 };
42 45
43 DriverResult WaitUntilNfcIsReady(); 46 DriverResult WaitUntilNfcIs(NFCStatus status);
44
45 DriverResult WaitUntilNfcIsPolling();
46 47
47 DriverResult IsTagInRange(TagFoundData& data, std::size_t timeout_limit = 1); 48 DriverResult IsTagInRange(TagFoundData& data, std::size_t timeout_limit = 1);
48 49
49 DriverResult GetAmiiboData(std::vector<u8>& data); 50 DriverResult GetAmiiboData(std::vector<u8>& data);
50 51
51 DriverResult SendStartPollingRequest(MCUCommandResponse& output); 52 DriverResult WriteAmiiboData(const TagUUID& tag_uuid, std::span<const u8> data);
53
54 DriverResult SendStartPollingRequest(MCUCommandResponse& output,
55 bool is_second_attempt = false);
52 56
53 DriverResult SendStopPollingRequest(MCUCommandResponse& output); 57 DriverResult SendStopPollingRequest(MCUCommandResponse& output);
54 58
@@ -56,8 +60,21 @@ private:
56 60
57 DriverResult SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages); 61 DriverResult SendReadAmiiboRequest(MCUCommandResponse& output, NFCPages ntag_pages);
58 62
63 DriverResult SendWriteAmiiboRequest(MCUCommandResponse& output, const TagUUID& tag_uuid);
64
65 DriverResult SendWriteDataAmiiboRequest(MCUCommandResponse& output, u8 block_id,
66 bool is_last_packet, std::span<const u8> data);
67
68 std::vector<u8> SerializeWritePackage(const NFCWritePackage& package) const;
69
70 NFCWritePackage MakeAmiiboWritePackage(const TagUUID& tag_uuid, std::span<const u8> data) const;
71
72 NFCDataChunk MakeAmiiboChunk(u8 page, u8 size, std::span<const u8> data) const;
73
59 NFCReadBlockCommand GetReadBlockCommand(NFCPages pages) const; 74 NFCReadBlockCommand GetReadBlockCommand(NFCPages pages) const;
60 75
76 TagUUID GetTagUUID(std::span<const u8> data) const;
77
61 bool is_enabled{}; 78 bool is_enabled{};
62 std::size_t update_counter{}; 79 std::size_t update_counter{};
63}; 80};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
index 442365a26..c2a0ee6f1 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -30,7 +30,7 @@ void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& hi
30 union { 30 union {
31 u64 insn; 31 u64 insn;
32 BitField<0, 8, IR::Reg> dest_reg; 32 BitField<0, 8, IR::Reg> dest_reg;
33 BitField<0, 8, IR::Reg> lo_bits_reg; 33 BitField<8, 8, IR::Reg> lo_bits_reg;
34 BitField<37, 2, MaxShift> max_shift; 34 BitField<37, 2, MaxShift> max_shift;
35 BitField<47, 1, u64> cc; 35 BitField<47, 1, u64> cc;
36 BitField<48, 2, u64> x_mode; 36 BitField<48, 2, u64> x_mode;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a0009a36f..308d013d6 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -246,10 +246,14 @@ add_library(video_core STATIC
246 texture_cache/util.h 246 texture_cache/util.h
247 textures/astc.h 247 textures/astc.h
248 textures/astc.cpp 248 textures/astc.cpp
249 textures/bcn.cpp
250 textures/bcn.h
249 textures/decoders.cpp 251 textures/decoders.cpp
250 textures/decoders.h 252 textures/decoders.h
251 textures/texture.cpp 253 textures/texture.cpp
252 textures/texture.h 254 textures/texture.h
255 textures/workers.cpp
256 textures/workers.h
253 transform_feedback.cpp 257 transform_feedback.cpp
254 transform_feedback.h 258 transform_feedback.h
255 video_core.cpp 259 video_core.cpp
@@ -275,7 +279,7 @@ add_library(video_core STATIC
275create_target_directory_groups(video_core) 279create_target_directory_groups(video_core)
276 280
277target_link_libraries(video_core PUBLIC common core) 281target_link_libraries(video_core PUBLIC common core)
278target_link_libraries(video_core PUBLIC glad shader_recompiler) 282target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
279 283
280if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 284if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
281 add_dependencies(video_core ffmpeg-build) 285 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 98756e4da..65494097b 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,8 +30,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
30 } 30 }
31 31
32 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 32 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
33 const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; 33 const s64 min_spacing_expected = device_memory - 1_GiB;
34 const s64 min_spacing_critical = device_memory - 1_GiB; 34 const s64 min_spacing_critical = device_memory - 512_MiB;
35 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 35 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 36 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 37 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
@@ -1664,7 +1664,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully 1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully
1665 // mapped buffer size for now. 1665 // mapped buffer size for now.
1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); 1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
1667 return memory_layout_size; 1667 return std::min(memory_layout_size, static_cast<u32>(8_MiB));
1668 }(); 1668 }();
1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1670 if (!cpu_addr || size == 0) { 1670 if (!cpu_addr || size == 0) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 31118886f..1e0823836 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -233,6 +233,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
233 const VideoCommon::ImageInfo& info) { 233 const VideoCommon::ImageInfo& info) {
234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) { 234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
235 return Settings::values.accelerate_astc.GetValue() && 235 return Settings::values.accelerate_astc.GetValue() &&
236 Settings::values.astc_recompression.GetValue() ==
237 Settings::AstcRecompression::Uncompressed &&
236 !Settings::values.async_astc.GetValue(); 238 !Settings::values.async_astc.GetValue();
237 } 239 }
238 // Disable other accelerated uploads for now as they don't implement swizzled uploads 240 // Disable other accelerated uploads for now as they don't implement swizzled uploads
@@ -437,6 +439,19 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
437 return GL_R32UI; 439 return GL_R32UI;
438} 440}
439 441
442[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
443 switch (Settings::values.astc_recompression.GetValue()) {
444 case Settings::AstcRecompression::Bc1:
445 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
446 break;
447 case Settings::AstcRecompression::Bc3:
448 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT : GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
449 break;
450 default:
451 return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
452 }
453}
454
440} // Anonymous namespace 455} // Anonymous namespace
441 456
442ImageBufferMap::~ImageBufferMap() { 457ImageBufferMap::~ImageBufferMap() {
@@ -739,9 +754,16 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
739 if (IsConverted(runtime->device, info.format, info.type)) { 754 if (IsConverted(runtime->device, info.format, info.type)) {
740 flags |= ImageFlagBits::Converted; 755 flags |= ImageFlagBits::Converted;
741 flags |= ImageFlagBits::CostlyLoad; 756 flags |= ImageFlagBits::CostlyLoad;
742 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 757
758 const bool is_srgb = IsPixelFormatSRGB(info.format);
759 gl_internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
743 gl_format = GL_RGBA; 760 gl_format = GL_RGBA;
744 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 761 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
762
763 if (IsPixelFormatASTC(info.format)) {
764 gl_internal_format = SelectAstcFormat(info.format, is_srgb);
765 gl_format = GL_NONE;
766 }
745 } else { 767 } else {
746 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); 768 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
747 gl_internal_format = tuple.internal_format; 769 gl_internal_format = tuple.internal_format;
@@ -1130,7 +1152,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1130 views{runtime.null_image_views} { 1152 views{runtime.null_image_views} {
1131 const Device& device = runtime.device; 1153 const Device& device = runtime.device;
1132 if (True(image.flags & ImageFlagBits::Converted)) { 1154 if (True(image.flags & ImageFlagBits::Converted)) {
1133 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 1155 const bool is_srgb = IsPixelFormatSRGB(info.format);
1156 internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
1157
1158 if (IsPixelFormatASTC(info.format)) {
1159 internal_format = SelectAstcFormat(info.format, is_srgb);
1160 }
1134 } else { 1161 } else {
1135 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; 1162 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
1136 } 1163 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1190999a8..3e9b3302b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -144,6 +144,10 @@ public:
144 return state_tracker; 144 return state_tracker;
145 } 145 }
146 146
147 void BarrierFeedbackLoop() const noexcept {
148 // OpenGL does not require a barrier for attachment feedback loops.
149 }
150
147private: 151private:
148 struct StagingBuffers { 152 struct StagingBuffers {
149 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); 153 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 8853cf0f7..b75d7220d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -6,6 +6,7 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/settings.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h" 11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/surface.h" 12#include "video_core/surface.h"
@@ -237,14 +238,25 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
237 PixelFormat pixel_format) { 238 PixelFormat pixel_format) {
238 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples)); 239 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
239 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)]; 240 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
240 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 241 // Transcode on hardware that doesn't support ASTC natively
241 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 242 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
242 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); 243 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
243 if (is_srgb) { 244
244 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; 245 switch (Settings::values.astc_recompression.GetValue()) {
245 } else { 246 case Settings::AstcRecompression::Uncompressed:
246 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; 247 if (is_srgb) {
247 tuple.usage |= Storage; 248 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
249 } else {
250 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
251 tuple.usage |= Storage;
252 }
253 break;
254 case Settings::AstcRecompression::Bc1:
255 tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
256 break;
257 case Settings::AstcRecompression::Bc3:
258 tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK;
259 break;
248 } 260 }
249 } 261 }
250 const bool attachable = (tuple.usage & Attachable) != 0; 262 const bool attachable = (tuple.usage & Attachable) != 0;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index f1bcd5cd6..506b78f08 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
481 if constexpr (Spec::enabled_stages[4]) { 481 if constexpr (Spec::enabled_stages[4]) {
482 prepare_stage(4); 482 prepare_stage(4);
483 } 483 }
484 texture_cache.UpdateRenderTargets(false);
485 texture_cache.CheckFeedbackLoop(views);
484 ConfigureDraw(rescaling, render_area); 486 ConfigureDraw(rescaling, render_area);
485} 487}
486 488
487void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, 489void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
488 const RenderAreaPushConstant& render_area) { 490 const RenderAreaPushConstant& render_area) {
489 texture_cache.UpdateRenderTargets(false);
490 scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); 491 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
491 492
492 if (!is_built.load(std::memory_order::relaxed)) { 493 if (!is_built.load(std::memory_order::relaxed)) {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 47c74e4d8..8b65aeaeb 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -10,11 +10,16 @@
10 10
11namespace Vulkan { 11namespace Vulkan {
12 12
13constexpr u64 FENCE_RESERVE_SIZE = 8;
14
13MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { 15MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
14 if (!device.HasTimelineSemaphore()) { 16 if (!device.HasTimelineSemaphore()) {
15 static constexpr VkFenceCreateInfo fence_ci{ 17 static constexpr VkFenceCreateInfo fence_ci{
16 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; 18 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
17 fence = device.GetLogical().CreateFence(fence_ci); 19 free_queue.resize(FENCE_RESERVE_SIZE);
20 std::ranges::generate(free_queue,
21 [&] { return device.GetLogical().CreateFence(fence_ci); });
22 wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
18 return; 23 return;
19 } 24 }
20 25
@@ -167,16 +172,53 @@ VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphor
167 .pSignalSemaphores = &signal_semaphore, 172 .pSignalSemaphores = &signal_semaphore,
168 }; 173 };
169 174
175 auto fence = GetFreeFence();
170 auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); 176 auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
171 177
172 if (result == VK_SUCCESS) { 178 if (result == VK_SUCCESS) {
179 std::scoped_lock lock{wait_mutex};
180 wait_queue.emplace(host_tick, std::move(fence));
181 wait_cv.notify_one();
182 }
183
184 return result;
185}
186
187void MasterSemaphore::WaitThread(std::stop_token token) {
188 while (!token.stop_requested()) {
189 u64 host_tick;
190 vk::Fence fence;
191 {
192 std::unique_lock lock{wait_mutex};
193 Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
194 if (token.stop_requested()) {
195 return;
196 }
197 std::tie(host_tick, fence) = std::move(wait_queue.front());
198 wait_queue.pop();
199 }
200
173 fence.Wait(); 201 fence.Wait();
174 fence.Reset(); 202 fence.Reset();
175 gpu_tick.store(host_tick); 203 gpu_tick.store(host_tick);
176 gpu_tick.notify_all(); 204 gpu_tick.notify_all();
205
206 std::scoped_lock lock{free_mutex};
207 free_queue.push_front(std::move(fence));
177 } 208 }
209}
178 210
179 return result; 211vk::Fence MasterSemaphore::GetFreeFence() {
212 std::scoped_lock lock{free_mutex};
213 if (free_queue.empty()) {
214 static constexpr VkFenceCreateInfo fence_ci{
215 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
216 return device.GetLogical().CreateFence(fence_ci);
217 }
218
219 auto fence = std::move(free_queue.back());
220 free_queue.pop_back();
221 return fence;
180} 222}
181 223
182} // namespace Vulkan 224} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f2f61f781..1e7c90215 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -5,8 +5,10 @@
5 5
6#include <atomic> 6#include <atomic>
7#include <condition_variable> 7#include <condition_variable>
8#include <deque>
8#include <mutex> 9#include <mutex>
9#include <thread> 10#include <thread>
11#include <queue>
10 12
11#include "common/common_types.h" 13#include "common/common_types.h"
12#include "common/polyfill_thread.h" 14#include "common/polyfill_thread.h"
@@ -17,6 +19,8 @@ namespace Vulkan {
17class Device; 19class Device;
18 20
19class MasterSemaphore { 21class MasterSemaphore {
22 using Waitable = std::pair<u64, vk::Fence>;
23
20public: 24public:
21 explicit MasterSemaphore(const Device& device); 25 explicit MasterSemaphore(const Device& device);
22 ~MasterSemaphore(); 26 ~MasterSemaphore();
@@ -57,13 +61,22 @@ private:
57 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 61 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
58 VkSemaphore wait_semaphore, u64 host_tick); 62 VkSemaphore wait_semaphore, u64 host_tick);
59 63
64 void WaitThread(std::stop_token token);
65
66 vk::Fence GetFreeFence();
67
60private: 68private:
61 const Device& device; ///< Device. 69 const Device& device; ///< Device.
62 vk::Fence fence; ///< Fence.
63 vk::Semaphore semaphore; ///< Timeline semaphore. 70 vk::Semaphore semaphore; ///< Timeline semaphore.
64 std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. 71 std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
65 std::atomic<u64> current_tick{1}; ///< Current logical tick. 72 std::atomic<u64> current_tick{1}; ///< Current logical tick.
73 std::mutex wait_mutex;
74 std::mutex free_mutex;
75 std::condition_variable_any wait_cv;
76 std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
77 std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
66 std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. 78 std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
79 std::jthread wait_thread; ///< Helper thread that waits for submitted fences.
67}; 80};
68 81
69} // namespace Vulkan 82} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 4d0481f2a..8711e2a87 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -861,6 +861,10 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
861 return *buffers[level]; 861 return *buffers[level];
862} 862}
863 863
864void TextureCacheRuntime::BarrierFeedbackLoop() {
865 scheduler.RequestOutsideRenderPassOperationContext();
866}
867
864void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
865 std::span<const VideoCommon::ImageCopy> copies) { 869 std::span<const VideoCommon::ImageCopy> copies) {
866 std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); 870 std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
@@ -1268,7 +1272,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1268 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1272 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
1269 if (Settings::values.async_astc.GetValue()) { 1273 if (Settings::values.async_astc.GetValue()) {
1270 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; 1274 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
1271 } else if (Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) { 1275 } else if (Settings::values.astc_recompression.GetValue() ==
1276 Settings::AstcRecompression::Uncompressed &&
1277 Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
1272 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 1278 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
1273 } 1279 }
1274 flags |= VideoCommon::ImageFlagBits::Converted; 1280 flags |= VideoCommon::ImageFlagBits::Converted;
@@ -1283,7 +1289,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1283 .usage = VK_IMAGE_USAGE_STORAGE_BIT, 1289 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1284 }; 1290 };
1285 current_image = *original_image; 1291 current_image = *original_image;
1286 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1292 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() &&
1293 Settings::values.astc_recompression.GetValue() ==
1294 Settings::AstcRecompression::Uncompressed) {
1287 const auto& device = runtime->device.GetLogical(); 1295 const auto& device = runtime->device.GetLogical();
1288 storage_image_views.reserve(info.resources.levels); 1296 storage_image_views.reserve(info.resources.levels);
1289 for (s32 level = 0; level < info.resources.levels; ++level) { 1297 for (s32 level = 0; level < info.resources.levels; ++level) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4166b3d20..0f7a5ffd4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -103,6 +103,8 @@ public:
103 103
104 [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); 104 [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
105 105
106 void BarrierFeedbackLoop();
107
106 const Device& device; 108 const Device& device;
107 Scheduler& scheduler; 109 Scheduler& scheduler;
108 MemoryAllocator& memory_allocator; 110 MemoryAllocator& memory_allocator;
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 91512022f..d79594ce5 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
155 flags &= ~ImageFlagBits::Alias; 155 flags &= ~ImageFlagBits::Alias;
156} 156}
157 157
158void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { 158bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; 159 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
160 ASSERT(lhs.info.type == rhs.info.type); 160 ASSERT(lhs.info.type == rhs.info.type);
161 std::optional<SubresourceBase> base; 161 std::optional<SubresourceBase> base;
@@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
169 } 169 }
170 if (!base) { 170 if (!base) {
171 LOG_ERROR(HW_GPU, "Image alias should have been flipped"); 171 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
172 return; 172 return false;
173 } 173 }
174 const PixelFormat lhs_format = lhs.info.format; 174 const PixelFormat lhs_format = lhs.info.format;
175 const PixelFormat rhs_format = rhs.info.format; 175 const PixelFormat rhs_format = rhs.info.format;
@@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
248 } 248 }
249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); 249 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
250 if (lhs_alias.copies.empty()) { 250 if (lhs_alias.copies.empty()) {
251 return; 251 return false;
252 } 252 }
253 lhs.aliased_images.push_back(std::move(lhs_alias)); 253 lhs.aliased_images.push_back(std::move(lhs_alias));
254 rhs.aliased_images.push_back(std::move(rhs_alias)); 254 rhs.aliased_images.push_back(std::move(rhs_alias));
255 lhs.flags &= ~ImageFlagBits::IsRescalable; 255 lhs.flags &= ~ImageFlagBits::IsRescalable;
256 rhs.flags &= ~ImageFlagBits::IsRescalable; 256 rhs.flags &= ~ImageFlagBits::IsRescalable;
257 return true;
257} 258}
258 259
259} // namespace VideoCommon 260} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 329396bb6..1b8a17ee8 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -142,6 +142,6 @@ struct ImageAllocBase {
142 std::vector<ImageId> images; 142 std::vector<ImageId> images;
143}; 143};
144 144
145void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); 145bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
146 146
147} // namespace VideoCommon 147} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b24086fce..2cf082c5d 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -49,8 +49,8 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
49 49
50 if constexpr (HAS_DEVICE_MEMORY_INFO) { 50 if constexpr (HAS_DEVICE_MEMORY_INFO) {
51 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory()); 51 const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
52 const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB; 52 const s64 min_spacing_expected = device_memory - 1_GiB;
53 const s64 min_spacing_critical = device_memory - 1_GiB; 53 const s64 min_spacing_critical = device_memory - 512_MiB;
54 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); 54 const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
55 const s64 min_vacancy_expected = (6 * mem_threshold) / 10; 55 const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
56 const s64 min_vacancy_critical = (3 * mem_threshold) / 10; 56 const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
@@ -86,10 +86,12 @@ void TextureCache<P>::RunGarbageCollector() {
86 // used by the async decoder thread. 86 // used by the async decoder thread.
87 return false; 87 return false;
88 } 88 }
89 if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
90 return false;
91 }
89 const bool must_download = 92 const bool must_download =
90 image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); 93 image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
91 if (!high_priority_mode && 94 if (!high_priority_mode && must_download) {
92 (must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
93 return false; 95 return false;
94 } 96 }
95 if (must_download) { 97 if (must_download) {
@@ -137,7 +139,6 @@ void TextureCache<P>::TickFrame() {
137 TickAsyncDecode(); 139 TickAsyncDecode();
138 140
139 runtime.TickFrame(); 141 runtime.TickFrame();
140 critical_gc = 0;
141 ++frame_tick; 142 ++frame_tick;
142 143
143 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { 144 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
@@ -184,6 +185,42 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
184} 185}
185 186
186template <class P> 187template <class P>
188void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
189 const bool requires_barrier = [&] {
190 for (const auto& view : views) {
191 if (!view.id) {
192 continue;
193 }
194 auto& image_view = slot_image_views[view.id];
195
196 // Check color targets
197 for (const auto& ct_view_id : render_targets.color_buffer_ids) {
198 if (ct_view_id) {
199 auto& ct_view = slot_image_views[ct_view_id];
200 if (image_view.image_id == ct_view.image_id) {
201 return true;
202 }
203 }
204 }
205
206 // Check zeta target
207 if (render_targets.depth_buffer_id) {
208 auto& zt_view = slot_image_views[render_targets.depth_buffer_id];
209 if (image_view.image_id == zt_view.image_id) {
210 return true;
211 }
212 }
213 }
214
215 return false;
216 }();
217
218 if (requires_barrier) {
219 runtime.BarrierFeedbackLoop();
220 }
221}
222
223template <class P>
187typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 224typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
188 if (index > channel_state->graphics_sampler_table.Limit()) { 225 if (index > channel_state->graphics_sampler_table.Limit()) {
189 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 226 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
@@ -1274,17 +1311,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1274 const size_t size_bytes = CalculateGuestSizeInBytes(new_info); 1311 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1275 const bool broken_views = runtime.HasBrokenTextureViewFormats(); 1312 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1276 const bool native_bgr = runtime.HasNativeBgr(); 1313 const bool native_bgr = runtime.HasNativeBgr();
1277 boost::container::small_vector<ImageId, 4> overlap_ids; 1314 join_overlap_ids.clear();
1278 std::unordered_set<ImageId> overlaps_found; 1315 join_overlaps_found.clear();
1279 boost::container::small_vector<ImageId, 4> left_aliased_ids; 1316 join_left_aliased_ids.clear();
1280 boost::container::small_vector<ImageId, 4> right_aliased_ids; 1317 join_right_aliased_ids.clear();
1281 std::unordered_set<ImageId> ignore_textures; 1318 join_ignore_textures.clear();
1282 boost::container::small_vector<ImageId, 4> bad_overlap_ids; 1319 join_bad_overlap_ids.clear();
1283 boost::container::small_vector<ImageId, 4> all_siblings; 1320 join_copies_to_do.clear();
1321 join_alias_indices.clear();
1284 const bool this_is_linear = info.type == ImageType::Linear; 1322 const bool this_is_linear = info.type == ImageType::Linear;
1285 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { 1323 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1286 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1324 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1287 ignore_textures.insert(overlap_id); 1325 join_ignore_textures.insert(overlap_id);
1288 return; 1326 return;
1289 } 1327 }
1290 const bool overlap_is_linear = overlap.info.type == ImageType::Linear; 1328 const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
@@ -1294,11 +1332,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1294 if (this_is_linear && overlap_is_linear) { 1332 if (this_is_linear && overlap_is_linear) {
1295 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { 1333 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1296 // Alias linear images with the same pitch 1334 // Alias linear images with the same pitch
1297 left_aliased_ids.push_back(overlap_id); 1335 join_left_aliased_ids.push_back(overlap_id);
1298 } 1336 }
1299 return; 1337 return;
1300 } 1338 }
1301 overlaps_found.insert(overlap_id); 1339 join_overlaps_found.insert(overlap_id);
1302 static constexpr bool strict_size = true; 1340 static constexpr bool strict_size = true;
1303 const std::optional<OverlapResult> solution = ResolveOverlap( 1341 const std::optional<OverlapResult> solution = ResolveOverlap(
1304 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); 1342 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
@@ -1306,33 +1344,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1306 gpu_addr = solution->gpu_addr; 1344 gpu_addr = solution->gpu_addr;
1307 cpu_addr = solution->cpu_addr; 1345 cpu_addr = solution->cpu_addr;
1308 new_info.resources = solution->resources; 1346 new_info.resources = solution->resources;
1309 overlap_ids.push_back(overlap_id); 1347 join_overlap_ids.push_back(overlap_id);
1310 all_siblings.push_back(overlap_id); 1348 join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
1311 return; 1349 return;
1312 } 1350 }
1313 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; 1351 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1314 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 1352 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
1315 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { 1353 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
1316 left_aliased_ids.push_back(overlap_id); 1354 join_left_aliased_ids.push_back(overlap_id);
1317 overlap.flags |= ImageFlagBits::Alias; 1355 overlap.flags |= ImageFlagBits::Alias;
1318 all_siblings.push_back(overlap_id); 1356 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1319 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 1357 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
1320 broken_views, native_bgr)) { 1358 broken_views, native_bgr)) {
1321 right_aliased_ids.push_back(overlap_id); 1359 join_right_aliased_ids.push_back(overlap_id);
1322 overlap.flags |= ImageFlagBits::Alias; 1360 overlap.flags |= ImageFlagBits::Alias;
1323 all_siblings.push_back(overlap_id); 1361 join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
1324 } else { 1362 } else {
1325 bad_overlap_ids.push_back(overlap_id); 1363 join_bad_overlap_ids.push_back(overlap_id);
1326 } 1364 }
1327 }; 1365 };
1328 ForEachImageInRegion(cpu_addr, size_bytes, region_check); 1366 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1329 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { 1367 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1330 if (!overlaps_found.contains(overlap_id)) { 1368 if (!join_overlaps_found.contains(overlap_id)) {
1331 if (True(overlap.flags & ImageFlagBits::Remapped)) { 1369 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1332 ignore_textures.insert(overlap_id); 1370 join_ignore_textures.insert(overlap_id);
1333 } 1371 }
1334 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { 1372 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
1335 ignore_textures.insert(overlap_id); 1373 join_ignore_textures.insert(overlap_id);
1336 } 1374 }
1337 } 1375 }
1338 }; 1376 };
@@ -1340,11 +1378,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1340 1378
1341 bool can_rescale = info.rescaleable; 1379 bool can_rescale = info.rescaleable;
1342 bool any_rescaled = false; 1380 bool any_rescaled = false;
1343 for (const ImageId sibling_id : all_siblings) { 1381 for (const auto& copy : join_copies_to_do) {
1344 if (!can_rescale) { 1382 if (!can_rescale) {
1345 break; 1383 break;
1346 } 1384 }
1347 Image& sibling = slot_images[sibling_id]; 1385 Image& sibling = slot_images[copy.id];
1348 can_rescale &= ImageCanRescale(sibling); 1386 can_rescale &= ImageCanRescale(sibling);
1349 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); 1387 any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
1350 } 1388 }
@@ -1352,13 +1390,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1352 can_rescale &= any_rescaled; 1390 can_rescale &= any_rescaled;
1353 1391
1354 if (can_rescale) { 1392 if (can_rescale) {
1355 for (const ImageId sibling_id : all_siblings) { 1393 for (const auto& copy : join_copies_to_do) {
1356 Image& sibling = slot_images[sibling_id]; 1394 Image& sibling = slot_images[copy.id];
1357 ScaleUp(sibling); 1395 ScaleUp(sibling);
1358 } 1396 }
1359 } else { 1397 } else {
1360 for (const ImageId sibling_id : all_siblings) { 1398 for (const auto& copy : join_copies_to_do) {
1361 Image& sibling = slot_images[sibling_id]; 1399 Image& sibling = slot_images[copy.id];
1362 ScaleDown(sibling); 1400 ScaleDown(sibling);
1363 } 1401 }
1364 } 1402 }
@@ -1370,7 +1408,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1370 new_image.flags |= ImageFlagBits::Sparse; 1408 new_image.flags |= ImageFlagBits::Sparse;
1371 } 1409 }
1372 1410
1373 for (const ImageId overlap_id : ignore_textures) { 1411 for (const ImageId overlap_id : join_ignore_textures) {
1374 Image& overlap = slot_images[overlap_id]; 1412 Image& overlap = slot_images[overlap_id];
1375 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1413 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1376 UNIMPLEMENTED(); 1414 UNIMPLEMENTED();
@@ -1391,14 +1429,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1391 ScaleDown(new_image); 1429 ScaleDown(new_image);
1392 } 1430 }
1393 1431
1394 std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) { 1432 std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
1395 const ImageBase& lhs_image = slot_images[lhs]; 1433 const ImageBase& lhs_image = slot_images[lhs.id];
1396 const ImageBase& rhs_image = slot_images[rhs]; 1434 const ImageBase& rhs_image = slot_images[rhs.id];
1397 return lhs_image.modification_tick < rhs_image.modification_tick; 1435 return lhs_image.modification_tick < rhs_image.modification_tick;
1398 }); 1436 });
1399 1437
1400 for (const ImageId overlap_id : overlap_ids) { 1438 ImageBase& new_image_base = new_image;
1401 Image& overlap = slot_images[overlap_id]; 1439 for (const ImageId aliased_id : join_right_aliased_ids) {
1440 ImageBase& aliased = slot_images[aliased_id];
1441 size_t alias_index = new_image_base.aliased_images.size();
1442 if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
1443 continue;
1444 }
1445 join_alias_indices.emplace(aliased_id, alias_index);
1446 new_image.flags |= ImageFlagBits::Alias;
1447 }
1448 for (const ImageId aliased_id : join_left_aliased_ids) {
1449 ImageBase& aliased = slot_images[aliased_id];
1450 size_t alias_index = new_image_base.aliased_images.size();
1451 if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
1452 continue;
1453 }
1454 join_alias_indices.emplace(aliased_id, alias_index);
1455 new_image.flags |= ImageFlagBits::Alias;
1456 }
1457 for (const ImageId aliased_id : join_bad_overlap_ids) {
1458 ImageBase& aliased = slot_images[aliased_id];
1459 aliased.overlapping_images.push_back(new_image_id);
1460 new_image.overlapping_images.push_back(aliased_id);
1461 if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
1462 aliased.overlapping_images.size() > 1) {
1463 aliased.flags |= ImageFlagBits::BadOverlap;
1464 }
1465 if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
1466 new_image.overlapping_images.size() > 1) {
1467 new_image.flags |= ImageFlagBits::BadOverlap;
1468 }
1469 }
1470
1471 for (const auto& copy_object : join_copies_to_do) {
1472 Image& overlap = slot_images[copy_object.id];
1473 if (copy_object.is_alias) {
1474 if (!overlap.IsSafeDownload()) {
1475 continue;
1476 }
1477 const auto alias_pointer = join_alias_indices.find(copy_object.id);
1478 if (alias_pointer == join_alias_indices.end()) {
1479 continue;
1480 }
1481 const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
1482 CopyImage(new_image_id, aliased.id, aliased.copies);
1483 new_image.modification_tick = overlap.modification_tick;
1484 continue;
1485 }
1402 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1486 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1403 new_image.flags |= ImageFlagBits::GpuModified; 1487 new_image.flags |= ImageFlagBits::GpuModified;
1404 const auto& resolution = Settings::values.resolution_info; 1488 const auto& resolution = Settings::values.resolution_info;
@@ -1411,35 +1495,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1411 } else { 1495 } else {
1412 runtime.CopyImage(new_image, overlap, std::move(copies)); 1496 runtime.CopyImage(new_image, overlap, std::move(copies));
1413 } 1497 }
1498 new_image.modification_tick = overlap.modification_tick;
1414 } 1499 }
1415 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1500 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1416 UntrackImage(overlap, overlap_id); 1501 UntrackImage(overlap, copy_object.id);
1417 }
1418 UnregisterImage(overlap_id);
1419 DeleteImage(overlap_id);
1420 }
1421 ImageBase& new_image_base = new_image;
1422 for (const ImageId aliased_id : right_aliased_ids) {
1423 ImageBase& aliased = slot_images[aliased_id];
1424 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1425 new_image.flags |= ImageFlagBits::Alias;
1426 }
1427 for (const ImageId aliased_id : left_aliased_ids) {
1428 ImageBase& aliased = slot_images[aliased_id];
1429 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1430 new_image.flags |= ImageFlagBits::Alias;
1431 }
1432 for (const ImageId aliased_id : bad_overlap_ids) {
1433 ImageBase& aliased = slot_images[aliased_id];
1434 aliased.overlapping_images.push_back(new_image_id);
1435 new_image.overlapping_images.push_back(aliased_id);
1436 if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
1437 aliased.flags |= ImageFlagBits::BadOverlap;
1438 }
1439 if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
1440 new_image.flags |= ImageFlagBits::BadOverlap;
1441 } 1502 }
1503 UnregisterImage(copy_object.id);
1504 DeleteImage(copy_object.id);
1442 } 1505 }
1506
1443 RegisterImage(new_image_id); 1507 RegisterImage(new_image_id);
1444 return new_image_id; 1508 return new_image_id;
1445} 1509}
@@ -1469,7 +1533,7 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag
1469 if (!copy.must_accelerate) { 1533 if (!copy.must_accelerate) {
1470 do { 1534 do {
1471 if (!src_id && !dst_id) { 1535 if (!src_id && !dst_id) {
1472 break; 1536 return std::nullopt;
1473 } 1537 }
1474 if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) { 1538 if (src_id && True(slot_images[src_id].flags & ImageFlagBits::GpuModified)) {
1475 break; 1539 break;
@@ -1847,10 +1911,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1847 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1911 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1848 } 1912 }
1849 total_used_memory += Common::AlignUp(tentative_size, 1024); 1913 total_used_memory += Common::AlignUp(tentative_size, 1024);
1850 if (total_used_memory > critical_memory && critical_gc < GC_EMERGENCY_COUNTS) {
1851 RunGarbageCollector();
1852 critical_gc++;
1853 }
1854 image.lru_index = lru_cache.Insert(image_id, frame_tick); 1914 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1855 1915
1856 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1916 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 0720494e5..3bfa92154 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -10,7 +10,9 @@
10#include <span> 10#include <span>
11#include <type_traits> 11#include <type_traits>
12#include <unordered_map> 12#include <unordered_map>
13#include <unordered_set>
13#include <vector> 14#include <vector>
15#include <boost/container/small_vector.hpp>
14#include <queue> 16#include <queue>
15 17
16#include "common/common_types.h" 18#include "common/common_types.h"
@@ -148,6 +150,9 @@ public:
148 /// Fill image_view_ids with the compute images in indices 150 /// Fill image_view_ids with the compute images in indices
149 void FillComputeImageViews(std::span<ImageViewInOut> views); 151 void FillComputeImageViews(std::span<ImageViewInOut> views);
150 152
153 /// Handle feedback loops during draws.
154 void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
155
151 /// Get the sampler from the graphics descriptor table in the specified index 156 /// Get the sampler from the graphics descriptor table in the specified index
152 Sampler* GetGraphicsSampler(u32 index); 157 Sampler* GetGraphicsSampler(u32 index);
153 158
@@ -424,7 +429,6 @@ private:
424 u64 minimum_memory; 429 u64 minimum_memory;
425 u64 expected_memory; 430 u64 expected_memory;
426 u64 critical_memory; 431 u64 critical_memory;
427 size_t critical_gc;
428 432
429 struct BufferDownload { 433 struct BufferDownload {
430 GPUVAddr address; 434 GPUVAddr address;
@@ -474,6 +478,20 @@ private:
474 478
475 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"}; 479 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
476 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes; 480 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
481
482 // Join caching
483 boost::container::small_vector<ImageId, 4> join_overlap_ids;
484 std::unordered_set<ImageId> join_overlaps_found;
485 boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
486 boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
487 std::unordered_set<ImageId> join_ignore_textures;
488 boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
489 struct JoinCopy {
490 bool is_alias;
491 ImageId id;
492 };
493 boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
494 std::unordered_map<ImageId, size_t> join_alias_indices;
477}; 495};
478 496
479} // namespace VideoCommon 497} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index f1071aa23..95a5b47d8 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -18,6 +18,8 @@
18#include "common/bit_util.h" 18#include "common/bit_util.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h"
22#include "common/settings.h"
21#include "video_core/compatible_formats.h" 23#include "video_core/compatible_formats.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -28,6 +30,7 @@
28#include "video_core/texture_cache/samples_helper.h" 30#include "video_core/texture_cache/samples_helper.h"
29#include "video_core/texture_cache/util.h" 31#include "video_core/texture_cache/util.h"
30#include "video_core/textures/astc.h" 32#include "video_core/textures/astc.h"
33#include "video_core/textures/bcn.h"
31#include "video_core/textures/decoders.h" 34#include "video_core/textures/decoders.h"
32 35
33namespace VideoCommon { 36namespace VideoCommon {
@@ -120,7 +123,9 @@ template <u32 GOB_EXTENT>
120 return { 123 return {
121 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), 124 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
122 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), 125 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
123 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), 126 .depth = level == 0
127 ? block_size.depth
128 : AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
124 }; 129 };
125} 130}
126 131
@@ -162,6 +167,13 @@ template <u32 GOB_EXTENT>
162} 167}
163 168
164[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { 169[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
170 if (level == 0) {
171 return Extent3D{
172 .width = info.block.width,
173 .height = info.block.height,
174 .depth = info.block.depth,
175 };
176 }
165 const Extent3D blocks = NumLevelBlocks(info, level); 177 const Extent3D blocks = NumLevelBlocks(info, level);
166 return Extent3D{ 178 return Extent3D{
167 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), 179 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
@@ -585,6 +597,21 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
585 return info.size.width * BytesPerBlock(info.format); 597 return info.size.width * BytesPerBlock(info.format);
586 } 598 }
587 static constexpr Extent2D TILE_SIZE{1, 1}; 599 static constexpr Extent2D TILE_SIZE{1, 1};
600 if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() !=
601 Settings::AstcRecompression::Uncompressed) {
602 const u32 bpp_div =
603 Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2
604 : 1;
605 // NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually.
606 u32 output_size = 0;
607 for (s32 i = 0; i < info.resources.levels; i++) {
608 const auto mip_size = AdjustMipSize(info.size, i);
609 const u32 plane_dim =
610 Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U);
611 output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div;
612 }
613 return output_size;
614 }
588 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; 615 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
589} 616}
590 617
@@ -885,6 +912,7 @@ BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
885void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 912void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
886 std::span<BufferImageCopy> copies) { 913 std::span<BufferImageCopy> copies) {
887 u32 output_offset = 0; 914 u32 output_offset = 0;
915 Common::ScratchBuffer<u8> decode_scratch;
888 916
889 const Extent2D tile_size = DefaultBlockSize(info.format); 917 const Extent2D tile_size = DefaultBlockSize(info.format);
890 for (BufferImageCopy& copy : copies) { 918 for (BufferImageCopy& copy : copies) {
@@ -895,22 +923,58 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
895 ASSERT(copy.image_extent == mip_size); 923 ASSERT(copy.image_extent == mip_size);
896 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); 924 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
897 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); 925 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
898 if (IsPixelFormatASTC(info.format)) { 926
927 const auto input_offset = input.subspan(copy.buffer_offset);
928 copy.buffer_offset = output_offset;
929 copy.buffer_row_length = mip_size.width;
930 copy.buffer_image_height = mip_size.height;
931
932 const auto recompression_setting = Settings::values.astc_recompression.GetValue();
933 const bool astc = IsPixelFormatASTC(info.format);
934
935 if (astc && recompression_setting == Settings::AstcRecompression::Uncompressed) {
899 Tegra::Texture::ASTC::Decompress( 936 Tegra::Texture::ASTC::Decompress(
900 input.subspan(copy.buffer_offset), copy.image_extent.width, 937 input_offset, copy.image_extent.width, copy.image_extent.height,
901 copy.image_extent.height,
902 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width, 938 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
903 tile_size.height, output.subspan(output_offset)); 939 tile_size.height, output.subspan(output_offset));
940
941 output_offset += copy.image_extent.width * copy.image_extent.height *
942 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
943 } else if (astc) {
944 // BC1 uses 0.5 bytes per texel
945 // BC3 uses 1 byte per texel
946 const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
947 ? Tegra::Texture::BCN::CompressBC1
948 : Tegra::Texture::BCN::CompressBC3;
949 const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
950
951 const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
952 const u32 level_size = plane_dim * copy.image_extent.depth *
953 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
954 decode_scratch.resize_destructive(level_size);
955
956 Tegra::Texture::ASTC::Decompress(
957 input_offset, copy.image_extent.width, copy.image_extent.height,
958 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
959 tile_size.height, decode_scratch);
960
961 compress(decode_scratch, copy.image_extent.width, copy.image_extent.height,
962 copy.image_subresource.num_layers * copy.image_extent.depth,
963 output.subspan(output_offset));
964
965 const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
966 Common::AlignUp(copy.image_extent.height, 4);
967
968 copy.buffer_size =
969 (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
970 bpp_div;
971 output_offset += static_cast<u32>(copy.buffer_size);
904 } else { 972 } else {
905 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, 973 DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
906 output.subspan(output_offset));
907 }
908 copy.buffer_offset = output_offset;
909 copy.buffer_row_length = mip_size.width;
910 copy.buffer_image_height = mip_size.height;
911 974
912 output_offset += copy.image_extent.width * copy.image_extent.height * 975 output_offset += copy.image_extent.width * copy.image_extent.height *
913 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 976 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
977 }
914 } 978 }
915} 979}
916 980
@@ -1233,7 +1297,9 @@ u32 MapSizeBytes(const ImageBase& image) {
1233 1297
1234static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 1298static_assert(CalculateLevelSize(LevelInfo{{1920, 1080, 1}, {0, 2, 0}, {1, 1}, 2, 0}, 0) ==
1235 0x7f8000); 1299 0x7f8000);
1236static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); 1300static_assert(CalculateLevelSize(LevelInfo{{32, 32, 1}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x40000);
1301
1302static_assert(CalculateLevelSize(LevelInfo{{128, 8, 1}, {0, 4, 0}, {1, 1}, 4, 0}, 0) == 0x40000);
1237 1303
1238static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) == 1304static_assert(CalculateLevelOffset(PixelFormat::R8_SINT, {1920, 1080, 1}, {0, 2, 0}, 0, 7) ==
1239 0x2afc00); 1305 0x2afc00);
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a68bc0d77..fef0be31d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -16,8 +16,8 @@
16#include "common/alignment.h" 16#include "common/alignment.h"
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/polyfill_ranges.h" 18#include "common/polyfill_ranges.h"
19#include "common/thread_worker.h"
20#include "video_core/textures/astc.h" 19#include "video_core/textures/astc.h"
20#include "video_core/textures/workers.h"
21 21
22class InputBitStream { 22class InputBitStream {
23public: 23public:
@@ -1656,8 +1656,7 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
1656 const u32 rows = Common::DivideUp(height, block_height); 1656 const u32 rows = Common::DivideUp(height, block_height);
1657 const u32 cols = Common::DivideUp(width, block_width); 1657 const u32 cols = Common::DivideUp(width, block_width);
1658 1658
1659 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, 1659 Common::ThreadWorker& workers{GetThreadWorkers()};
1660 "ASTCDecompress"};
1661 1660
1662 for (u32 z = 0; z < depth; ++z) { 1661 for (u32 z = 0; z < depth; ++z) {
1663 const u32 depth_offset = z * height * width * 4; 1662 const u32 depth_offset = z * height * width * 4;
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp
new file mode 100644
index 000000000..671212a49
--- /dev/null
+++ b/src/video_core/textures/bcn.cpp
@@ -0,0 +1,87 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stb_dxt.h>
5#include <string.h>
6
7#include "common/alignment.h"
8#include "video_core/textures/bcn.h"
9#include "video_core/textures/workers.h"
10
11namespace Tegra::Texture::BCN {
12
13using BCNCompressor = void(u8* block_output, const u8* block_input, bool any_alpha);
14
15template <u32 BytesPerBlock, bool ThresholdAlpha = false>
16void CompressBCN(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
17 std::span<uint8_t> output, BCNCompressor f) {
18 constexpr u8 alpha_threshold = 128;
19 constexpr u32 bytes_per_px = 4;
20 const u32 plane_dim = width * height;
21
22 Common::ThreadWorker& workers{GetThreadWorkers()};
23
24 for (u32 z = 0; z < depth; z++) {
25 for (u32 y = 0; y < height; y += 4) {
26 auto compress_row = [z, y, width, height, plane_dim, f, data, output]() {
27 for (u32 x = 0; x < width; x += 4) {
28 // Gather 4x4 block of RGBA texels
29 u8 input_colors[4][4][4];
30 bool any_alpha = false;
31
32 for (u32 j = 0; j < 4; j++) {
33 for (u32 i = 0; i < 4; i++) {
34 const size_t coord =
35 (z * plane_dim + (y + j) * width + (x + i)) * bytes_per_px;
36
37 if ((x + i < width) && (y + j < height)) {
38 if constexpr (ThresholdAlpha) {
39 if (data[coord + 3] >= alpha_threshold) {
40 input_colors[j][i][0] = data[coord + 0];
41 input_colors[j][i][1] = data[coord + 1];
42 input_colors[j][i][2] = data[coord + 2];
43 input_colors[j][i][3] = 255;
44 } else {
45 any_alpha = true;
46 memset(input_colors[j][i], 0, bytes_per_px);
47 }
48 } else {
49 memcpy(input_colors[j][i], &data[coord], bytes_per_px);
50 }
51 } else {
52 memset(input_colors[j][i], 0, bytes_per_px);
53 }
54 }
55 }
56
57 const u32 bytes_per_row = BytesPerBlock * Common::DivideUp(width, 4U);
58 const u32 bytes_per_plane = bytes_per_row * Common::DivideUp(height, 4U);
59 f(output.data() + z * bytes_per_plane + (y / 4) * bytes_per_row +
60 (x / 4) * BytesPerBlock,
61 reinterpret_cast<u8*>(input_colors), any_alpha);
62 }
63 };
64 workers.QueueWork(std::move(compress_row));
65 }
66 workers.WaitForRequests();
67 }
68}
69
70void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
71 std::span<uint8_t> output) {
72 CompressBCN<8, true>(data, width, height, depth, output,
73 [](u8* block_output, const u8* block_input, bool any_alpha) {
74 stb_compress_bc1_block(block_output, block_input, any_alpha,
75 STB_DXT_NORMAL);
76 });
77}
78
79void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
80 std::span<uint8_t> output) {
81 CompressBCN<16, false>(data, width, height, depth, output,
82 [](u8* block_output, const u8* block_input, bool any_alpha) {
83 stb_compress_bc3_block(block_output, block_input, STB_DXT_NORMAL);
84 });
85}
86
87} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h
new file mode 100644
index 000000000..6464af885
--- /dev/null
+++ b/src/video_core/textures/bcn.h
@@ -0,0 +1,17 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <stdint.h>
8
9namespace Tegra::Texture::BCN {
10
11void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
12 std::span<uint8_t> output);
13
14void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
15 std::span<uint8_t> output);
16
17} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/workers.cpp b/src/video_core/textures/workers.cpp
new file mode 100644
index 000000000..a71c305f4
--- /dev/null
+++ b/src/video_core/textures/workers.cpp
@@ -0,0 +1,15 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/textures/workers.h"
5
6namespace Tegra::Texture {
7
8Common::ThreadWorker& GetThreadWorkers() {
9 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
10 "ImageTranscode"};
11
12 return workers;
13}
14
15} // namespace Tegra::Texture
diff --git a/src/video_core/textures/workers.h b/src/video_core/textures/workers.h
new file mode 100644
index 000000000..008dd05b3
--- /dev/null
+++ b/src/video_core/textures/workers.h
@@ -0,0 +1,12 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/thread_worker.h"
7
8namespace Tegra::Texture {
9
10Common::ThreadWorker& GetThreadWorkers();
11
12}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index c03f4a56b..aea677cb3 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1002,6 +1002,11 @@ u64 Device::GetDeviceMemoryUsage() const {
1002} 1002}
1003 1003
1004void Device::CollectPhysicalMemoryInfo() { 1004void Device::CollectPhysicalMemoryInfo() {
1005 // Account for resolution scaling in memory limits
1006 const size_t normal_memory = 6_GiB;
1007 const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
1008
1009 // Calculate limits using memory budget
1005 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; 1010 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
1006 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 1011 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
1007 const auto mem_info = 1012 const auto mem_info =
@@ -1031,11 +1036,12 @@ void Device::CollectPhysicalMemoryInfo() {
1031 if (!is_integrated) { 1036 if (!is_integrated) {
1032 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB); 1037 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
1033 device_access_memory -= reserve_memory; 1038 device_access_memory -= reserve_memory;
1039 device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
1034 return; 1040 return;
1035 } 1041 }
1036 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); 1042 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
1037 device_access_memory = static_cast<u64>(std::max<s64>( 1043 device_access_memory = static_cast<u64>(std::max<s64>(
1038 std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory))); 1044 std::min<s64>(available_memory - 8_GiB, 4_GiB), std::min<s64>(local_memory, 4_GiB)));
1039} 1045}
1040 1046
1041void Device::CollectToolingInfo() { 1047void Device::CollectToolingInfo() {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 70737c54e..662651196 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -711,6 +711,7 @@ void Config::ReadRendererValues() {
711 ReadGlobalSetting(Settings::values.nvdec_emulation); 711 ReadGlobalSetting(Settings::values.nvdec_emulation);
712 ReadGlobalSetting(Settings::values.accelerate_astc); 712 ReadGlobalSetting(Settings::values.accelerate_astc);
713 ReadGlobalSetting(Settings::values.async_astc); 713 ReadGlobalSetting(Settings::values.async_astc);
714 ReadGlobalSetting(Settings::values.astc_recompression);
714 ReadGlobalSetting(Settings::values.use_reactive_flushing); 715 ReadGlobalSetting(Settings::values.use_reactive_flushing);
715 ReadGlobalSetting(Settings::values.shader_backend); 716 ReadGlobalSetting(Settings::values.shader_backend);
716 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 717 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1359,6 +1360,10 @@ void Config::SaveRendererValues() {
1359 Settings::values.nvdec_emulation.UsingGlobal()); 1360 Settings::values.nvdec_emulation.UsingGlobal());
1360 WriteGlobalSetting(Settings::values.accelerate_astc); 1361 WriteGlobalSetting(Settings::values.accelerate_astc);
1361 WriteGlobalSetting(Settings::values.async_astc); 1362 WriteGlobalSetting(Settings::values.async_astc);
1363 WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
1364 static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
1365 static_cast<u32>(Settings::values.astc_recompression.GetDefault()),
1366 Settings::values.astc_recompression.UsingGlobal());
1362 WriteGlobalSetting(Settings::values.use_reactive_flushing); 1367 WriteGlobalSetting(Settings::values.use_reactive_flushing);
1363 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), 1368 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
1364 static_cast<u32>(Settings::values.shader_backend.GetValue(global)), 1369 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 7d26e9ab6..9cb9db6cf 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -208,3 +208,4 @@ Q_DECLARE_METATYPE(Settings::ScalingFilter);
208Q_DECLARE_METATYPE(Settings::AntiAliasing); 208Q_DECLARE_METATYPE(Settings::AntiAliasing);
209Q_DECLARE_METATYPE(Settings::RendererBackend); 209Q_DECLARE_METATYPE(Settings::RendererBackend);
210Q_DECLARE_METATYPE(Settings::ShaderBackend); 210Q_DECLARE_METATYPE(Settings::ShaderBackend);
211Q_DECLARE_METATYPE(Settings::AstcRecompression);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 1f3e489d0..896863f87 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -27,6 +27,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
27 ui->async_present->setEnabled(runtime_lock); 27 ui->async_present->setEnabled(runtime_lock);
28 ui->renderer_force_max_clock->setEnabled(runtime_lock); 28 ui->renderer_force_max_clock->setEnabled(runtime_lock);
29 ui->async_astc->setEnabled(runtime_lock); 29 ui->async_astc->setEnabled(runtime_lock);
30 ui->astc_recompression_combobox->setEnabled(runtime_lock);
30 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 31 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
31 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 32 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
32 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock); 33 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock);
@@ -47,14 +48,20 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
47 static_cast<int>(Settings::values.gpu_accuracy.GetValue())); 48 static_cast<int>(Settings::values.gpu_accuracy.GetValue()));
48 ui->anisotropic_filtering_combobox->setCurrentIndex( 49 ui->anisotropic_filtering_combobox->setCurrentIndex(
49 Settings::values.max_anisotropy.GetValue()); 50 Settings::values.max_anisotropy.GetValue());
51 ui->astc_recompression_combobox->setCurrentIndex(
52 static_cast<int>(Settings::values.astc_recompression.GetValue()));
50 } else { 53 } else {
51 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); 54 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
52 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, 55 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
53 &Settings::values.max_anisotropy); 56 &Settings::values.max_anisotropy);
57 ConfigurationShared::SetPerGameSetting(ui->astc_recompression_combobox,
58 &Settings::values.astc_recompression);
54 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 59 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
55 !Settings::values.gpu_accuracy.UsingGlobal()); 60 !Settings::values.gpu_accuracy.UsingGlobal());
56 ConfigurationShared::SetHighlight(ui->af_label, 61 ConfigurationShared::SetHighlight(ui->af_label,
57 !Settings::values.max_anisotropy.UsingGlobal()); 62 !Settings::values.max_anisotropy.UsingGlobal());
63 ConfigurationShared::SetHighlight(ui->label_astc_recompression,
64 !Settings::values.astc_recompression.UsingGlobal());
58 } 65 }
59} 66}
60 67
@@ -71,6 +78,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
71 ui->use_reactive_flushing, use_reactive_flushing); 78 ui->use_reactive_flushing, use_reactive_flushing);
72 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, 79 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
73 async_astc); 80 async_astc);
81 ConfigurationShared::ApplyPerGameSetting(&Settings::values.astc_recompression,
82 ui->astc_recompression_combobox);
74 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 83 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
75 ui->use_asynchronous_shaders, 84 ui->use_asynchronous_shaders,
76 use_asynchronous_shaders); 85 use_asynchronous_shaders);
@@ -105,6 +114,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
105 Settings::values.renderer_force_max_clock.UsingGlobal()); 114 Settings::values.renderer_force_max_clock.UsingGlobal());
106 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal()); 115 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal());
107 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); 116 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
117 ui->astc_recompression_combobox->setEnabled(
118 Settings::values.astc_recompression.UsingGlobal());
108 ui->use_asynchronous_shaders->setEnabled( 119 ui->use_asynchronous_shaders->setEnabled(
109 Settings::values.use_asynchronous_shaders.UsingGlobal()); 120 Settings::values.use_asynchronous_shaders.UsingGlobal());
110 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 121 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -144,6 +155,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
144 ConfigurationShared::SetColoredComboBox( 155 ConfigurationShared::SetColoredComboBox(
145 ui->anisotropic_filtering_combobox, ui->af_label, 156 ui->anisotropic_filtering_combobox, ui->af_label,
146 static_cast<int>(Settings::values.max_anisotropy.GetValue(true))); 157 static_cast<int>(Settings::values.max_anisotropy.GetValue(true)));
158 ConfigurationShared::SetColoredComboBox(
159 ui->astc_recompression_combobox, ui->label_astc_recompression,
160 static_cast<int>(Settings::values.astc_recompression.GetValue(true)));
147} 161}
148 162
149void ConfigureGraphicsAdvanced::ExposeComputeOption() { 163void ConfigureGraphicsAdvanced::ExposeComputeOption() {
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 9ef7c8e8f..37757a918 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,50 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QWidget" name="astc_recompression_layout" native="true">
74 <layout class="QHBoxLayout" name="horizontalLayout_3">
75 <property name="leftMargin">
76 <number>0</number>
77 </property>
78 <property name="topMargin">
79 <number>0</number>
80 </property>
81 <property name="rightMargin">
82 <number>0</number>
83 </property>
84 <property name="bottomMargin">
85 <number>0</number>
86 </property>
87 <item>
88 <widget class="QLabel" name="label_astc_recompression">
89 <property name="text">
90 <string>ASTC recompression:</string>
91 </property>
92 </widget>
93 </item>
94 <item>
95 <widget class="QComboBox" name="astc_recompression_combobox">
96 <item>
97 <property name="text">
98 <string>Uncompressed (Best quality)</string>
99 </property>
100 </item>
101 <item>
102 <property name="text">
103 <string>BC1 (Low quality)</string>
104 </property>
105 </item>
106 <item>
107 <property name="text">
108 <string>BC3 (Medium quality)</string>
109 </property>
110 </item>
111 </widget>
112 </item>
113 </layout>
114 </widget>
115 </item>
116 <item>
73 <widget class="QCheckBox" name="async_present"> 117 <widget class="QCheckBox" name="async_present">
74 <property name="text"> 118 <property name="text">
75 <string>Enable asynchronous presentation (Vulkan only)</string> 119 <string>Enable asynchronous presentation (Vulkan only)</string>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index dc9a3d68f..c5bc472ca 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -318,6 +318,7 @@ void Config::ReadValues() {
318 ReadSetting("Renderer", Settings::values.nvdec_emulation); 318 ReadSetting("Renderer", Settings::values.nvdec_emulation);
319 ReadSetting("Renderer", Settings::values.accelerate_astc); 319 ReadSetting("Renderer", Settings::values.accelerate_astc);
320 ReadSetting("Renderer", Settings::values.async_astc); 320 ReadSetting("Renderer", Settings::values.async_astc);
321 ReadSetting("Renderer", Settings::values.astc_recompression);
321 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 322 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
322 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); 323 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
323 324
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 5e7c3ac04..644a30e59 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -360,6 +360,10 @@ accelerate_astc =
360# 0 (default): Off, 1: On 360# 0 (default): Off, 1: On
361async_astc = 361async_astc =
362 362
363# Recompress ASTC textures to a different format.
364# 0 (default): Uncompressed, 1: BC1 (Low quality), 2: BC3: (Medium quality)
365async_astc =
366
363# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value 367# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
364# 0: Off, 1: On (default) 368# 0: Off, 1: On (default)
365use_speed_limit = 369use_speed_limit =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 5f39ece32..7b6d49c63 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -227,7 +227,7 @@ int main(int argc, char** argv) {
227 }; 227 };
228 228
229 while (optind < argc) { 229 while (optind < argc) {
230 int arg = getopt_long(argc, argv, "g:fhvp::c:", long_options, &option_index); 230 int arg = getopt_long(argc, argv, "g:fhvp::c:u:", long_options, &option_index);
231 if (arg != -1) { 231 if (arg != -1) {
232 switch (static_cast<char>(arg)) { 232 switch (static_cast<char>(arg)) {
233 case 'c': 233 case 'c':
@@ -283,7 +283,7 @@ int main(int argc, char** argv) {
283 break; 283 break;
284 case 'u': 284 case 'u':
285 selected_user = atoi(optarg); 285 selected_user = atoi(optarg);
286 return 0; 286 break;
287 case 'v': 287 case 'v':
288 PrintVersion(); 288 PrintVersion();
289 return 0; 289 return 0;