summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-02-17 00:59:28 -0300
committerGravatar ameerj2021-07-22 21:51:22 -0400
commit85cce78583bc2232428a8fb39e43182877c8d5ad (patch)
tree308f4ef2d145652e08dff1da31c72c2f00dad2e1 /src/video_core
parentshader: Remove old shader management (diff)
downloadyuzu-85cce78583bc2232428a8fb39e43182877c8d5ad.tar.gz
yuzu-85cce78583bc2232428a8fb39e43182877c8d5ad.tar.xz
yuzu-85cce78583bc2232428a8fb39e43182877c8d5ad.zip
shader: Primitive Vulkan integration
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/engines/kepler_compute.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h2298
-rw-r--r--src/video_core/engines/shader_header.h158
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp140
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline.h36
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp190
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
15 files changed, 430 insertions, 2538 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c5ce71706..3323e6916 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -43,9 +43,6 @@ add_library(video_core STATIC
43 engines/maxwell_3d.h 43 engines/maxwell_3d.h
44 engines/maxwell_dma.cpp 44 engines/maxwell_dma.cpp
45 engines/maxwell_dma.h 45 engines/maxwell_dma.h
46 engines/shader_bytecode.h
47 engines/shader_header.h
48 engines/shader_type.h
49 framebuffer_config.h 46 framebuffer_config.h
50 macro/macro.cpp 47 macro/macro.cpp
51 macro/macro.h 48 macro/macro.h
@@ -123,6 +120,7 @@ add_library(video_core STATIC
123 renderer_vulkan/vk_master_semaphore.h 120 renderer_vulkan/vk_master_semaphore.h
124 renderer_vulkan/vk_pipeline_cache.cpp 121 renderer_vulkan/vk_pipeline_cache.cpp
125 renderer_vulkan/vk_pipeline_cache.h 122 renderer_vulkan/vk_pipeline_cache.h
123 renderer_vulkan/vk_pipeline.h
126 renderer_vulkan/vk_query_cache.cpp 124 renderer_vulkan/vk_query_cache.cpp
127 renderer_vulkan/vk_query_cache.h 125 renderer_vulkan/vk_query_cache.h
128 renderer_vulkan/vk_rasterizer.cpp 126 renderer_vulkan/vk_rasterizer.cpp
@@ -201,7 +199,7 @@ add_library(video_core STATIC
201create_target_directory_groups(video_core) 199create_target_directory_groups(video_core)
202 200
203target_link_libraries(video_core PUBLIC common core) 201target_link_libraries(video_core PUBLIC common core)
204target_link_libraries(video_core PRIVATE glad xbyak) 202target_link_libraries(video_core PRIVATE glad shader_recompiler xbyak)
205 203
206if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 204if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
207 add_dependencies(video_core ffmpeg-build) 205 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 0d7683c2d..f8b8d06ac 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,7 +12,6 @@
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h" 13#include "video_core/engines/engine_interface.h"
14#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
15#include "video_core/engines/shader_type.h"
16#include "video_core/gpu.h" 15#include "video_core/gpu.h"
17#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
18 17
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
deleted file mode 100644
index 8b45f1b62..000000000
--- a/src/video_core/engines/shader_bytecode.h
+++ /dev/null
@@ -1,2298 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <optional>
10#include <tuple>
11#include <vector>
12
13#include "common/assert.h"
14#include "common/bit_field.h"
15#include "common/common_types.h"
16
17namespace Tegra::Shader {
18
19struct Register {
20 /// Number of registers
21 static constexpr std::size_t NumRegisters = 256;
22
23 /// Register 255 is special cased to always be 0
24 static constexpr std::size_t ZeroIndex = 255;
25
26 enum class Size : u64 {
27 Byte = 0,
28 Short = 1,
29 Word = 2,
30 Long = 3,
31 };
32
33 constexpr Register() = default;
34
35 constexpr Register(u64 value_) : value(value_) {}
36
37 [[nodiscard]] constexpr operator u64() const {
38 return value;
39 }
40
41 template <typename T>
42 [[nodiscard]] constexpr u64 operator-(const T& oth) const {
43 return value - oth;
44 }
45
46 template <typename T>
47 [[nodiscard]] constexpr u64 operator&(const T& oth) const {
48 return value & oth;
49 }
50
51 [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
52 return value & oth.value;
53 }
54
55 [[nodiscard]] constexpr u64 operator~() const {
56 return ~value;
57 }
58
59 [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
60 elem = (value + elem) & 3;
61 return (value & ~3) + elem;
62 }
63
64private:
65 u64 value{};
66};
67
68enum class AttributeSize : u64 {
69 Word = 0,
70 DoubleWord = 1,
71 TripleWord = 2,
72 QuadWord = 3,
73};
74
75union Attribute {
76 Attribute() = default;
77
78 constexpr explicit Attribute(u64 value_) : value(value_) {}
79
80 enum class Index : u64 {
81 LayerViewportPointSize = 6,
82 Position = 7,
83 Attribute_0 = 8,
84 Attribute_31 = 39,
85 FrontColor = 40,
86 FrontSecondaryColor = 41,
87 BackColor = 42,
88 BackSecondaryColor = 43,
89 ClipDistances0123 = 44,
90 ClipDistances4567 = 45,
91 PointCoord = 46,
92 // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
93 // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
94 // shader.
95 TessCoordInstanceIDVertexID = 47,
96 TexCoord_0 = 48,
97 TexCoord_7 = 55,
98 // This attribute contains a tuple of (Unk, Unk, Unk, gl_FrontFacing) when inside a fragment
99 // shader. It is unknown what the other values contain.
100 FrontFacing = 63,
101 };
102
103 union {
104 BitField<20, 10, u64> immediate;
105 BitField<22, 2, u64> element;
106 BitField<24, 6, Index> index;
107 BitField<31, 1, u64> patch;
108 BitField<47, 3, AttributeSize> size;
109
110 [[nodiscard]] bool IsPhysical() const {
111 return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
112 }
113 } fmt20;
114
115 union {
116 BitField<30, 2, u64> element;
117 BitField<32, 6, Index> index;
118 } fmt28;
119
120 BitField<39, 8, u64> reg;
121 u64 value{};
122};
123
124union Sampler {
125 Sampler() = default;
126
127 constexpr explicit Sampler(u64 value_) : value(value_) {}
128
129 enum class Index : u64 {
130 Sampler_0 = 8,
131 };
132
133 BitField<36, 13, Index> index;
134 u64 value{};
135};
136
137union Image {
138 Image() = default;
139
140 constexpr explicit Image(u64 value_) : value{value_} {}
141
142 BitField<36, 13, u64> index;
143 u64 value;
144};
145
146} // namespace Tegra::Shader
147
148namespace std {
149
150// TODO(bunnei): The below is forbidden by the C++ standard, but works fine. See #330.
151template <>
152struct make_unsigned<Tegra::Shader::Attribute> {
153 using type = Tegra::Shader::Attribute;
154};
155
156template <>
157struct make_unsigned<Tegra::Shader::Register> {
158 using type = Tegra::Shader::Register;
159};
160
161} // namespace std
162
163namespace Tegra::Shader {
164
165enum class Pred : u64 {
166 UnusedIndex = 0x7,
167 NeverExecute = 0xF,
168};
169
170enum class PredCondition : u64 {
171 F = 0, // Always false
172 LT = 1, // Ordered less than
173 EQ = 2, // Ordered equal
174 LE = 3, // Ordered less than or equal
175 GT = 4, // Ordered greater than
176 NE = 5, // Ordered not equal
177 GE = 6, // Ordered greater than or equal
178 NUM = 7, // Ordered
179 NAN_ = 8, // Unordered
180 LTU = 9, // Unordered less than
181 EQU = 10, // Unordered equal
182 LEU = 11, // Unordered less than or equal
183 GTU = 12, // Unordered greater than
184 NEU = 13, // Unordered not equal
185 GEU = 14, // Unordered greater than or equal
186 T = 15, // Always true
187};
188
189enum class PredOperation : u64 {
190 And = 0,
191 Or = 1,
192 Xor = 2,
193};
194
195enum class LogicOperation : u64 {
196 And = 0,
197 Or = 1,
198 Xor = 2,
199 PassB = 3,
200};
201
202enum class SubOp : u64 {
203 Cos = 0x0,
204 Sin = 0x1,
205 Ex2 = 0x2,
206 Lg2 = 0x3,
207 Rcp = 0x4,
208 Rsq = 0x5,
209 Sqrt = 0x8,
210};
211
212enum class F2iRoundingOp : u64 {
213 RoundEven = 0,
214 Floor = 1,
215 Ceil = 2,
216 Trunc = 3,
217};
218
219enum class F2fRoundingOp : u64 {
220 None = 0,
221 Pass = 3,
222 Round = 8,
223 Floor = 9,
224 Ceil = 10,
225 Trunc = 11,
226};
227
228enum class AtomicOp : u64 {
229 Add = 0,
230 Min = 1,
231 Max = 2,
232 Inc = 3,
233 Dec = 4,
234 And = 5,
235 Or = 6,
236 Xor = 7,
237 Exch = 8,
238 SafeAdd = 10,
239};
240
241enum class GlobalAtomicType : u64 {
242 U32 = 0,
243 S32 = 1,
244 U64 = 2,
245 F32_FTZ_RN = 3,
246 F16x2_FTZ_RN = 4,
247 S64 = 5,
248};
249
250enum class UniformType : u64 {
251 UnsignedByte = 0,
252 SignedByte = 1,
253 UnsignedShort = 2,
254 SignedShort = 3,
255 Single = 4,
256 Double = 5,
257 Quad = 6,
258 UnsignedQuad = 7,
259};
260
261enum class StoreType : u64 {
262 Unsigned8 = 0,
263 Signed8 = 1,
264 Unsigned16 = 2,
265 Signed16 = 3,
266 Bits32 = 4,
267 Bits64 = 5,
268 Bits128 = 6,
269};
270
271enum class AtomicType : u64 {
272 U32 = 0,
273 S32 = 1,
274 U64 = 2,
275 S64 = 3,
276};
277
278enum class IMinMaxExchange : u64 {
279 None = 0,
280 XLo = 1,
281 XMed = 2,
282 XHi = 3,
283};
284
285enum class VideoType : u64 {
286 Size16_Low = 0,
287 Size16_High = 1,
288 Size32 = 2,
289 Invalid = 3,
290};
291
292enum class VmadShr : u64 {
293 Shr7 = 1,
294 Shr15 = 2,
295};
296
297enum class VmnmxType : u64 {
298 Bits8,
299 Bits16,
300 Bits32,
301};
302
303enum class VmnmxOperation : u64 {
304 Mrg_16H = 0,
305 Mrg_16L = 1,
306 Mrg_8B0 = 2,
307 Mrg_8B2 = 3,
308 Acc = 4,
309 Min = 5,
310 Max = 6,
311 Nop = 7,
312};
313
314enum class XmadMode : u64 {
315 None = 0,
316 CLo = 1,
317 CHi = 2,
318 CSfu = 3,
319 CBcc = 4,
320};
321
322enum class IAdd3Mode : u64 {
323 None = 0,
324 RightShift = 1,
325 LeftShift = 2,
326};
327
328enum class IAdd3Height : u64 {
329 None = 0,
330 LowerHalfWord = 1,
331 UpperHalfWord = 2,
332};
333
334enum class FlowCondition : u64 {
335 Always = 0xF,
336 Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
337};
338
339enum class ConditionCode : u64 {
340 F = 0,
341 LT = 1,
342 EQ = 2,
343 LE = 3,
344 GT = 4,
345 NE = 5,
346 GE = 6,
347 Num = 7,
348 Nan = 8,
349 LTU = 9,
350 EQU = 10,
351 LEU = 11,
352 GTU = 12,
353 NEU = 13,
354 GEU = 14,
355 T = 15,
356 OFF = 16,
357 LO = 17,
358 SFF = 18,
359 LS = 19,
360 HI = 20,
361 SFT = 21,
362 HS = 22,
363 OFT = 23,
364 CSM_TA = 24,
365 CSM_TR = 25,
366 CSM_MX = 26,
367 FCSM_TA = 27,
368 FCSM_TR = 28,
369 FCSM_MX = 29,
370 RLE = 30,
371 RGT = 31,
372};
373
374enum class PredicateResultMode : u64 {
375 None = 0x0,
376 NotZero = 0x3,
377};
378
379enum class TextureType : u64 {
380 Texture1D = 0,
381 Texture2D = 1,
382 Texture3D = 2,
383 TextureCube = 3,
384};
385
386enum class TextureQueryType : u64 {
387 Dimension = 1,
388 TextureType = 2,
389 SamplePosition = 5,
390 Filter = 16,
391 LevelOfDetail = 18,
392 Wrap = 20,
393 BorderColor = 22,
394};
395
396enum class TextureProcessMode : u64 {
397 None = 0,
398 LZ = 1, // Load LOD of zero.
399 LB = 2, // Load Bias.
400 LL = 3, // Load LOD.
401 LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB.
402 LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL.
403};
404
405enum class TextureMiscMode : u64 {
406 DC,
407 AOFFI, // Uses Offset
408 NDV,
409 NODEP,
410 MZ,
411 PTP,
412};
413
414enum class SurfaceDataMode : u64 {
415 P = 0,
416 D_BA = 1,
417};
418
419enum class OutOfBoundsStore : u64 {
420 Ignore = 0,
421 Clamp = 1,
422 Trap = 2,
423};
424
425enum class ImageType : u64 {
426 Texture1D = 0,
427 TextureBuffer = 1,
428 Texture1DArray = 2,
429 Texture2D = 3,
430 Texture2DArray = 4,
431 Texture3D = 5,
432};
433
434enum class IsberdMode : u64 {
435 None = 0,
436 Patch = 1,
437 Prim = 2,
438 Attr = 3,
439};
440
441enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
442
443enum class MembarType : u64 {
444 CTA = 0,
445 GL = 1,
446 SYS = 2,
447 VC = 3,
448};
449
450enum class MembarUnknown : u64 { Default = 0, IVALLD = 1, IVALLT = 2, IVALLTD = 3 };
451
452enum class HalfType : u64 {
453 H0_H1 = 0,
454 F32 = 1,
455 H0_H0 = 2,
456 H1_H1 = 3,
457};
458
459enum class HalfMerge : u64 {
460 H0_H1 = 0,
461 F32 = 1,
462 Mrg_H0 = 2,
463 Mrg_H1 = 3,
464};
465
466enum class HalfPrecision : u64 {
467 None = 0,
468 FTZ = 1,
469 FMZ = 2,
470};
471
472enum class R2pMode : u64 {
473 Pr = 0,
474 Cc = 1,
475};
476
477enum class IpaInterpMode : u64 {
478 Pass = 0,
479 Multiply = 1,
480 Constant = 2,
481 Sc = 3,
482};
483
484enum class IpaSampleMode : u64 {
485 Default = 0,
486 Centroid = 1,
487 Offset = 2,
488};
489
490enum class LmemLoadCacheManagement : u64 {
491 Default = 0,
492 LU = 1,
493 CI = 2,
494 CV = 3,
495};
496
497enum class StoreCacheManagement : u64 {
498 Default = 0,
499 CG = 1,
500 CS = 2,
501 WT = 3,
502};
503
504struct IpaMode {
505 IpaInterpMode interpolation_mode;
506 IpaSampleMode sampling_mode;
507
508 [[nodiscard]] bool operator==(const IpaMode& a) const {
509 return std::tie(interpolation_mode, sampling_mode) ==
510 std::tie(a.interpolation_mode, a.sampling_mode);
511 }
512 [[nodiscard]] bool operator!=(const IpaMode& a) const {
513 return !operator==(a);
514 }
515 [[nodiscard]] bool operator<(const IpaMode& a) const {
516 return std::tie(interpolation_mode, sampling_mode) <
517 std::tie(a.interpolation_mode, a.sampling_mode);
518 }
519};
520
521enum class SystemVariable : u64 {
522 LaneId = 0x00,
523 VirtCfg = 0x02,
524 VirtId = 0x03,
525 Pm0 = 0x04,
526 Pm1 = 0x05,
527 Pm2 = 0x06,
528 Pm3 = 0x07,
529 Pm4 = 0x08,
530 Pm5 = 0x09,
531 Pm6 = 0x0a,
532 Pm7 = 0x0b,
533 OrderingTicket = 0x0f,
534 PrimType = 0x10,
535 InvocationId = 0x11,
536 Ydirection = 0x12,
537 ThreadKill = 0x13,
538 ShaderType = 0x14,
539 DirectBeWriteAddressLow = 0x15,
540 DirectBeWriteAddressHigh = 0x16,
541 DirectBeWriteEnabled = 0x17,
542 MachineId0 = 0x18,
543 MachineId1 = 0x19,
544 MachineId2 = 0x1a,
545 MachineId3 = 0x1b,
546 Affinity = 0x1c,
547 InvocationInfo = 0x1d,
548 WscaleFactorXY = 0x1e,
549 WscaleFactorZ = 0x1f,
550 Tid = 0x20,
551 TidX = 0x21,
552 TidY = 0x22,
553 TidZ = 0x23,
554 CtaParam = 0x24,
555 CtaIdX = 0x25,
556 CtaIdY = 0x26,
557 CtaIdZ = 0x27,
558 NtId = 0x28,
559 CirQueueIncrMinusOne = 0x29,
560 Nlatc = 0x2a,
561 SmSpaVersion = 0x2c,
562 MultiPassShaderInfo = 0x2d,
563 LwinHi = 0x2e,
564 SwinHi = 0x2f,
565 SwinLo = 0x30,
566 SwinSz = 0x31,
567 SmemSz = 0x32,
568 SmemBanks = 0x33,
569 LwinLo = 0x34,
570 LwinSz = 0x35,
571 LmemLosz = 0x36,
572 LmemHioff = 0x37,
573 EqMask = 0x38,
574 LtMask = 0x39,
575 LeMask = 0x3a,
576 GtMask = 0x3b,
577 GeMask = 0x3c,
578 RegAlloc = 0x3d,
579 CtxAddr = 0x3e, // .fmask = F_SM50
580 BarrierAlloc = 0x3e, // .fmask = F_SM60
581 GlobalErrorStatus = 0x40,
582 WarpErrorStatus = 0x42,
583 WarpErrorStatusClear = 0x43,
584 PmHi0 = 0x48,
585 PmHi1 = 0x49,
586 PmHi2 = 0x4a,
587 PmHi3 = 0x4b,
588 PmHi4 = 0x4c,
589 PmHi5 = 0x4d,
590 PmHi6 = 0x4e,
591 PmHi7 = 0x4f,
592 ClockLo = 0x50,
593 ClockHi = 0x51,
594 GlobalTimerLo = 0x52,
595 GlobalTimerHi = 0x53,
596 HwTaskId = 0x60,
597 CircularQueueEntryIndex = 0x61,
598 CircularQueueEntryAddressLow = 0x62,
599 CircularQueueEntryAddressHigh = 0x63,
600};
601
602enum class PhysicalAttributeDirection : u64 {
603 Input = 0,
604 Output = 1,
605};
606
607enum class VoteOperation : u64 {
608 All = 0, // allThreadsNV
609 Any = 1, // anyThreadNV
610 Eq = 2, // allThreadsEqualNV
611};
612
613enum class ImageAtomicOperationType : u64 {
614 U32 = 0,
615 S32 = 1,
616 U64 = 2,
617 F32 = 3,
618 S64 = 5,
619 SD32 = 6,
620 SD64 = 7,
621};
622
623enum class ImageAtomicOperation : u64 {
624 Add = 0,
625 Min = 1,
626 Max = 2,
627 Inc = 3,
628 Dec = 4,
629 And = 5,
630 Or = 6,
631 Xor = 7,
632 Exch = 8,
633};
634
635enum class ShuffleOperation : u64 {
636 Idx = 0, // shuffleNV
637 Up = 1, // shuffleUpNV
638 Down = 2, // shuffleDownNV
639 Bfly = 3, // shuffleXorNV
640};
641
642enum class ShfType : u64 {
643 Bits32 = 0,
644 U64 = 2,
645 S64 = 3,
646};
647
648enum class ShfXmode : u64 {
649 None = 0,
650 HI = 1,
651 X = 2,
652 XHI = 3,
653};
654
655union Instruction {
656 constexpr Instruction& operator=(const Instruction& instr) {
657 value = instr.value;
658 return *this;
659 }
660
661 constexpr Instruction(u64 value_) : value{value_} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663
664 [[nodiscard]] constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
668 BitField<0, 8, Register> gpr0;
669 BitField<8, 8, Register> gpr8;
670 union {
671 BitField<16, 4, Pred> full_pred;
672 BitField<16, 3, u64> pred_index;
673 } pred;
674 BitField<19, 1, u64> negate_pred;
675 BitField<20, 8, Register> gpr20;
676 BitField<20, 4, SubOp> sub_op;
677 BitField<28, 8, Register> gpr28;
678 BitField<39, 8, Register> gpr39;
679 BitField<48, 16, u64> opcode;
680
681 union {
682 BitField<8, 5, ConditionCode> cc;
683 BitField<13, 1, u64> trigger;
684 } nop;
685
686 union {
687 BitField<48, 2, VoteOperation> operation;
688 BitField<45, 3, u64> dest_pred;
689 BitField<39, 3, u64> value;
690 BitField<42, 1, u64> negate_value;
691 } vote;
692
693 union {
694 BitField<30, 2, ShuffleOperation> operation;
695 BitField<48, 3, u64> pred48;
696 BitField<28, 1, u64> is_index_imm;
697 BitField<29, 1, u64> is_mask_imm;
698 BitField<20, 5, u64> index_imm;
699 BitField<34, 13, u64> mask_imm;
700 } shfl;
701
702 union {
703 BitField<44, 1, u64> ftz;
704 BitField<39, 2, u64> tab5cb8_2;
705 BitField<38, 1, u64> ndv;
706 BitField<47, 1, u64> cc;
707 BitField<28, 8, u64> swizzle;
708 } fswzadd;
709
710 union {
711 BitField<8, 8, Register> gpr;
712 BitField<20, 24, s64> offset;
713 } gmem;
714
715 union {
716 BitField<20, 16, u64> imm20_16;
717 BitField<20, 19, u64> imm20_19;
718 BitField<20, 32, s64> imm20_32;
719 BitField<45, 1, u64> negate_b;
720 BitField<46, 1, u64> abs_a;
721 BitField<48, 1, u64> negate_a;
722 BitField<49, 1, u64> abs_b;
723 BitField<50, 1, u64> saturate_d;
724 BitField<56, 1, u64> negate_imm;
725
726 union {
727 BitField<39, 3, u64> pred;
728 BitField<42, 1, u64> negate_pred;
729 } fmnmx;
730
731 union {
732 BitField<39, 1, u64> invert_a;
733 BitField<40, 1, u64> invert_b;
734 BitField<41, 2, LogicOperation> operation;
735 BitField<44, 2, PredicateResultMode> pred_result_mode;
736 BitField<48, 3, Pred> pred48;
737 } lop;
738
739 union {
740 BitField<53, 2, LogicOperation> operation;
741 BitField<55, 1, u64> invert_a;
742 BitField<56, 1, u64> invert_b;
743 } lop32i;
744
745 union {
746 BitField<28, 8, u64> imm_lut28;
747 BitField<48, 8, u64> imm_lut48;
748
749 [[nodiscard]] u32 GetImmLut28() const {
750 return static_cast<u32>(imm_lut28);
751 }
752
753 [[nodiscard]] u32 GetImmLut48() const {
754 return static_cast<u32>(imm_lut48);
755 }
756 } lop3;
757
758 [[nodiscard]] u16 GetImm20_16() const {
759 return static_cast<u16>(imm20_16);
760 }
761
762 [[nodiscard]] u32 GetImm20_19() const {
763 u32 imm{static_cast<u32>(imm20_19)};
764 imm <<= 12;
765 imm |= negate_imm ? 0x80000000 : 0;
766 return imm;
767 }
768
769 [[nodiscard]] u32 GetImm20_32() const {
770 return static_cast<u32>(imm20_32);
771 }
772
773 [[nodiscard]] s32 GetSignedImm20_20() const {
774 const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
775 // Sign extend the 20-bit value.
776 const auto mask = 1U << (20 - 1);
777 return static_cast<s32>((immediate ^ mask) - mask);
778 }
779 } alu;
780
781 union {
782 BitField<38, 1, u64> idx;
783 BitField<51, 1, u64> saturate;
784 BitField<52, 2, IpaSampleMode> sample_mode;
785 BitField<54, 2, IpaInterpMode> interp_mode;
786 } ipa;
787
788 union {
789 BitField<39, 2, u64> tab5cb8_2;
790 BitField<41, 3, u64> postfactor;
791 BitField<44, 2, u64> tab5c68_0;
792 BitField<48, 1, u64> negate_b;
793 } fmul;
794
795 union {
796 BitField<55, 1, u64> saturate;
797 } fmul32;
798
799 union {
800 BitField<52, 1, u64> generates_cc;
801 } op_32;
802
803 union {
804 BitField<48, 1, u64> is_signed;
805 } shift;
806
807 union {
808 BitField<39, 1, u64> wrap;
809 } shr;
810
811 union {
812 BitField<37, 2, ShfType> type;
813 BitField<48, 2, ShfXmode> xmode;
814 BitField<50, 1, u64> wrap;
815 BitField<20, 6, u64> immediate;
816 } shf;
817
818 union {
819 BitField<39, 5, u64> shift_amount;
820 BitField<48, 1, u64> negate_b;
821 BitField<49, 1, u64> negate_a;
822 } alu_integer;
823
824 union {
825 BitField<43, 1, u64> x;
826 } iadd;
827
828 union {
829 BitField<39, 1, u64> ftz;
830 BitField<32, 1, u64> saturate;
831 BitField<49, 2, HalfMerge> merge;
832
833 BitField<44, 1, u64> abs_a;
834 BitField<47, 2, HalfType> type_a;
835
836 BitField<30, 1, u64> abs_b;
837 BitField<28, 2, HalfType> type_b;
838
839 BitField<35, 2, HalfType> type_c;
840 } alu_half;
841
842 union {
843 BitField<39, 2, HalfPrecision> precision;
844 BitField<39, 1, u64> ftz;
845 BitField<52, 1, u64> saturate;
846 BitField<49, 2, HalfMerge> merge;
847
848 BitField<43, 1, u64> negate_a;
849 BitField<44, 1, u64> abs_a;
850 BitField<47, 2, HalfType> type_a;
851 } alu_half_imm;
852
853 union {
854 BitField<29, 1, u64> first_negate;
855 BitField<20, 9, u64> first;
856
857 BitField<56, 1, u64> second_negate;
858 BitField<30, 9, u64> second;
859
860 [[nodiscard]] u32 PackImmediates() const {
861 // Immediates are half floats shifted.
862 constexpr u32 imm_shift = 6;
863 return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
864 }
865 } half_imm;
866
867 union {
868 union {
869 BitField<37, 2, HalfPrecision> precision;
870 BitField<32, 1, u64> saturate;
871
872 BitField<31, 1, u64> negate_b;
873 BitField<30, 1, u64> negate_c;
874 BitField<35, 2, HalfType> type_c;
875 } rr;
876
877 BitField<57, 2, HalfPrecision> precision;
878 BitField<52, 1, u64> saturate;
879
880 BitField<49, 2, HalfMerge> merge;
881
882 BitField<47, 2, HalfType> type_a;
883
884 BitField<56, 1, u64> negate_b;
885 BitField<28, 2, HalfType> type_b;
886
887 BitField<51, 1, u64> negate_c;
888 BitField<53, 2, HalfType> type_reg39;
889 } hfma2;
890
891 union {
892 BitField<40, 1, u64> invert;
893 } popc;
894
895 union {
896 BitField<41, 1, u64> sh;
897 BitField<40, 1, u64> invert;
898 BitField<48, 1, u64> is_signed;
899 } flo;
900
901 union {
902 BitField<39, 3, u64> pred;
903 BitField<42, 1, u64> neg_pred;
904 } sel;
905
906 union {
907 BitField<39, 3, u64> pred;
908 BitField<42, 1, u64> negate_pred;
909 BitField<43, 2, IMinMaxExchange> exchange;
910 BitField<48, 1, u64> is_signed;
911 } imnmx;
912
913 union {
914 BitField<31, 2, IAdd3Height> height_c;
915 BitField<33, 2, IAdd3Height> height_b;
916 BitField<35, 2, IAdd3Height> height_a;
917 BitField<37, 2, IAdd3Mode> mode;
918 BitField<49, 1, u64> neg_c;
919 BitField<50, 1, u64> neg_b;
920 BitField<51, 1, u64> neg_a;
921 } iadd3;
922
923 union {
924 BitField<54, 1, u64> saturate;
925 BitField<56, 1, u64> negate_a;
926 } iadd32i;
927
928 union {
929 BitField<53, 1, u64> negate_b;
930 BitField<54, 1, u64> abs_a;
931 BitField<56, 1, u64> negate_a;
932 BitField<57, 1, u64> abs_b;
933 } fadd32i;
934
935 union {
936 BitField<40, 1, u64> brev;
937 BitField<47, 1, u64> rd_cc;
938 BitField<48, 1, u64> is_signed;
939 } bfe;
940
941 union {
942 BitField<48, 3, u64> pred48;
943
944 union {
945 BitField<20, 20, u64> entry_a;
946 BitField<39, 5, u64> entry_b;
947 BitField<45, 1, u64> neg;
948 BitField<46, 1, u64> uses_cc;
949 } imm;
950
951 union {
952 BitField<20, 14, u64> cb_index;
953 BitField<34, 5, u64> cb_offset;
954 BitField<56, 1, u64> neg;
955 BitField<57, 1, u64> uses_cc;
956 } hi;
957
958 union {
959 BitField<20, 14, u64> cb_index;
960 BitField<34, 5, u64> cb_offset;
961 BitField<39, 5, u64> entry_a;
962 BitField<45, 1, u64> neg;
963 BitField<46, 1, u64> uses_cc;
964 } rz;
965
966 union {
967 BitField<39, 5, u64> entry_a;
968 BitField<45, 1, u64> neg;
969 BitField<46, 1, u64> uses_cc;
970 } r1;
971
972 union {
973 BitField<28, 8, u64> entry_a;
974 BitField<37, 1, u64> neg;
975 BitField<38, 1, u64> uses_cc;
976 } r2;
977
978 } lea;
979
980 union {
981 BitField<0, 5, FlowCondition> cond;
982 } flow;
983
984 union {
985 BitField<47, 1, u64> cc;
986 BitField<48, 1, u64> negate_b;
987 BitField<49, 1, u64> negate_c;
988 BitField<51, 2, u64> tab5980_1;
989 BitField<53, 2, u64> tab5980_0;
990 } ffma;
991
992 union {
993 BitField<48, 3, UniformType> type;
994 BitField<44, 2, u64> unknown;
995 } ld_c;
996
997 union {
998 BitField<48, 3, StoreType> type;
999 } ldst_sl;
1000
1001 union {
1002 BitField<44, 2, u64> unknown;
1003 } ld_l;
1004
1005 union {
1006 BitField<44, 2, StoreCacheManagement> cache_management;
1007 } st_l;
1008
1009 union {
1010 BitField<48, 3, UniformType> type;
1011 BitField<46, 2, u64> cache_mode;
1012 } ldg;
1013
1014 union {
1015 BitField<48, 3, UniformType> type;
1016 BitField<46, 2, u64> cache_mode;
1017 } stg;
1018
1019 union {
1020 BitField<23, 3, AtomicOp> operation;
1021 BitField<48, 1, u64> extended;
1022 BitField<20, 3, GlobalAtomicType> type;
1023 } red;
1024
1025 union {
1026 BitField<52, 4, AtomicOp> operation;
1027 BitField<49, 3, GlobalAtomicType> type;
1028 BitField<28, 20, s64> offset;
1029 } atom;
1030
1031 union {
1032 BitField<52, 4, AtomicOp> operation;
1033 BitField<28, 2, AtomicType> type;
1034 BitField<30, 22, s64> offset;
1035
1036 [[nodiscard]] s32 GetImmediateOffset() const {
1037 return static_cast<s32>(offset << 2);
1038 }
1039 } atoms;
1040
1041 union {
1042 BitField<32, 1, PhysicalAttributeDirection> direction;
1043 BitField<47, 3, AttributeSize> size;
1044 BitField<20, 11, u64> address;
1045 } al2p;
1046
1047 union {
1048 BitField<53, 3, UniformType> type;
1049 BitField<52, 1, u64> extended;
1050 } generic;
1051
1052 union {
1053 BitField<0, 3, u64> pred0;
1054 BitField<3, 3, u64> pred3;
1055 BitField<6, 1, u64> neg_b;
1056 BitField<7, 1, u64> abs_a;
1057 BitField<39, 3, u64> pred39;
1058 BitField<42, 1, u64> neg_pred;
1059 BitField<43, 1, u64> neg_a;
1060 BitField<44, 1, u64> abs_b;
1061 BitField<45, 2, PredOperation> op;
1062 BitField<47, 1, u64> ftz;
1063 BitField<48, 4, PredCondition> cond;
1064 } fsetp;
1065
1066 union {
1067 BitField<0, 3, u64> pred0;
1068 BitField<3, 3, u64> pred3;
1069 BitField<39, 3, u64> pred39;
1070 BitField<42, 1, u64> neg_pred;
1071 BitField<45, 2, PredOperation> op;
1072 BitField<48, 1, u64> is_signed;
1073 BitField<49, 3, PredCondition> cond;
1074 } isetp;
1075
1076 union {
1077 BitField<48, 1, u64> is_signed;
1078 BitField<49, 3, PredCondition> cond;
1079 } icmp;
1080
1081 union {
1082 BitField<0, 3, u64> pred0;
1083 BitField<3, 3, u64> pred3;
1084 BitField<12, 3, u64> pred12;
1085 BitField<15, 1, u64> neg_pred12;
1086 BitField<24, 2, PredOperation> cond;
1087 BitField<29, 3, u64> pred29;
1088 BitField<32, 1, u64> neg_pred29;
1089 BitField<39, 3, u64> pred39;
1090 BitField<42, 1, u64> neg_pred39;
1091 BitField<45, 2, PredOperation> op;
1092 } psetp;
1093
1094 union {
1095 BitField<43, 4, PredCondition> cond;
1096 BitField<45, 2, PredOperation> op;
1097 BitField<3, 3, u64> pred3;
1098 BitField<0, 3, u64> pred0;
1099 BitField<39, 3, u64> pred39;
1100 } vsetp;
1101
1102 union {
1103 BitField<12, 3, u64> pred12;
1104 BitField<15, 1, u64> neg_pred12;
1105 BitField<24, 2, PredOperation> cond;
1106 BitField<29, 3, u64> pred29;
1107 BitField<32, 1, u64> neg_pred29;
1108 BitField<39, 3, u64> pred39;
1109 BitField<42, 1, u64> neg_pred39;
1110 BitField<44, 1, u64> bf;
1111 BitField<45, 2, PredOperation> op;
1112 } pset;
1113
1114 union {
1115 BitField<0, 3, u64> pred0;
1116 BitField<3, 3, u64> pred3;
1117 BitField<8, 5, ConditionCode> cc; // flag in cc
1118 BitField<39, 3, u64> pred39;
1119 BitField<42, 1, u64> neg_pred39;
1120 BitField<45, 4, PredOperation> op; // op with pred39
1121 } csetp;
1122
1123 union {
1124 BitField<6, 1, u64> ftz;
1125 BitField<45, 2, PredOperation> op;
1126 BitField<3, 3, u64> pred3;
1127 BitField<0, 3, u64> pred0;
1128 BitField<43, 1, u64> negate_a;
1129 BitField<44, 1, u64> abs_a;
1130 BitField<47, 2, HalfType> type_a;
1131 union {
1132 BitField<35, 4, PredCondition> cond;
1133 BitField<49, 1, u64> h_and;
1134 BitField<31, 1, u64> negate_b;
1135 BitField<30, 1, u64> abs_b;
1136 BitField<28, 2, HalfType> type_b;
1137 } reg;
1138 union {
1139 BitField<56, 1, u64> negate_b;
1140 BitField<54, 1, u64> abs_b;
1141 } cbuf;
1142 union {
1143 BitField<49, 4, PredCondition> cond;
1144 BitField<53, 1, u64> h_and;
1145 } cbuf_and_imm;
1146 BitField<42, 1, u64> neg_pred;
1147 BitField<39, 3, u64> pred39;
1148 } hsetp2;
1149
1150 union {
1151 BitField<40, 1, R2pMode> mode;
1152 BitField<41, 2, u64> byte;
1153 BitField<20, 7, u64> immediate_mask;
1154 } p2r_r2p;
1155
1156 union {
1157 BitField<39, 3, u64> pred39;
1158 BitField<42, 1, u64> neg_pred;
1159 BitField<43, 1, u64> neg_a;
1160 BitField<44, 1, u64> abs_b;
1161 BitField<45, 2, PredOperation> op;
1162 BitField<48, 4, PredCondition> cond;
1163 BitField<52, 1, u64> bf;
1164 BitField<53, 1, u64> neg_b;
1165 BitField<54, 1, u64> abs_a;
1166 BitField<55, 1, u64> ftz;
1167 } fset;
1168
1169 union {
1170 BitField<47, 1, u64> ftz;
1171 BitField<48, 4, PredCondition> cond;
1172 } fcmp;
1173
1174 union {
1175 BitField<49, 1, u64> bf;
1176 BitField<35, 3, PredCondition> cond;
1177 BitField<50, 1, u64> ftz;
1178 BitField<45, 2, PredOperation> op;
1179 BitField<43, 1, u64> negate_a;
1180 BitField<44, 1, u64> abs_a;
1181 BitField<47, 2, HalfType> type_a;
1182 BitField<31, 1, u64> negate_b;
1183 BitField<30, 1, u64> abs_b;
1184 BitField<28, 2, HalfType> type_b;
1185 BitField<42, 1, u64> neg_pred;
1186 BitField<39, 3, u64> pred39;
1187 } hset2;
1188
1189 union {
1190 BitField<39, 3, u64> pred39;
1191 BitField<42, 1, u64> neg_pred;
1192 BitField<44, 1, u64> bf;
1193 BitField<45, 2, PredOperation> op;
1194 BitField<48, 1, u64> is_signed;
1195 BitField<49, 3, PredCondition> cond;
1196 } iset;
1197
1198 union {
1199 BitField<45, 1, u64> negate_a;
1200 BitField<49, 1, u64> abs_a;
1201 BitField<10, 2, Register::Size> src_size;
1202 BitField<13, 1, u64> is_input_signed;
1203 BitField<8, 2, Register::Size> dst_size;
1204 BitField<12, 1, u64> is_output_signed;
1205
1206 union {
1207 BitField<39, 2, u64> tab5cb8_2;
1208 } i2f;
1209
1210 union {
1211 BitField<39, 2, F2iRoundingOp> rounding;
1212 } f2i;
1213
1214 union {
1215 BitField<39, 4, u64> rounding;
1216 // H0, H1 extract for F16 missing
1217 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
1218 [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
1219 constexpr u64 rounding_mask = 0x0B;
1220 return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
1221 }
1222 } f2f;
1223
1224 union {
1225 BitField<41, 2, u64> selector;
1226 } int_src;
1227
1228 union {
1229 BitField<41, 1, u64> selector;
1230 } float_src;
1231 } conversion;
1232
1233 union {
1234 BitField<28, 1, u64> array;
1235 BitField<29, 2, TextureType> texture_type;
1236 BitField<31, 4, u64> component_mask;
1237 BitField<49, 1, u64> nodep_flag;
1238 BitField<50, 1, u64> dc_flag;
1239 BitField<54, 1, u64> aoffi_flag;
1240 BitField<55, 3, TextureProcessMode> process_mode;
1241
1242 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1243 return ((1ULL << component) & component_mask) != 0;
1244 }
1245
1246 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1247 return process_mode;
1248 }
1249
1250 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1251 switch (mode) {
1252 case TextureMiscMode::DC:
1253 return dc_flag != 0;
1254 case TextureMiscMode::NODEP:
1255 return nodep_flag != 0;
1256 case TextureMiscMode::AOFFI:
1257 return aoffi_flag != 0;
1258 default:
1259 break;
1260 }
1261 return false;
1262 }
1263 } tex;
1264
1265 union {
1266 BitField<28, 1, u64> array;
1267 BitField<29, 2, TextureType> texture_type;
1268 BitField<31, 4, u64> component_mask;
1269 BitField<49, 1, u64> nodep_flag;
1270 BitField<50, 1, u64> dc_flag;
1271 BitField<36, 1, u64> aoffi_flag;
1272 BitField<37, 3, TextureProcessMode> process_mode;
1273
1274 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1275 return ((1ULL << component) & component_mask) != 0;
1276 }
1277
1278 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1279 return process_mode;
1280 }
1281
1282 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1283 switch (mode) {
1284 case TextureMiscMode::DC:
1285 return dc_flag != 0;
1286 case TextureMiscMode::NODEP:
1287 return nodep_flag != 0;
1288 case TextureMiscMode::AOFFI:
1289 return aoffi_flag != 0;
1290 default:
1291 break;
1292 }
1293 return false;
1294 }
1295 } tex_b;
1296
1297 union {
1298 BitField<22, 6, TextureQueryType> query_type;
1299 BitField<31, 4, u64> component_mask;
1300 BitField<49, 1, u64> nodep_flag;
1301
1302 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1303 switch (mode) {
1304 case TextureMiscMode::NODEP:
1305 return nodep_flag != 0;
1306 default:
1307 break;
1308 }
1309 return false;
1310 }
1311
1312 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1313 return ((1ULL << component) & component_mask) != 0;
1314 }
1315 } txq;
1316
1317 union {
1318 BitField<28, 1, u64> array;
1319 BitField<29, 2, TextureType> texture_type;
1320 BitField<31, 4, u64> component_mask;
1321 BitField<35, 1, u64> ndv_flag;
1322 BitField<49, 1, u64> nodep_flag;
1323
1324 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1325 return ((1ULL << component) & component_mask) != 0;
1326 }
1327
1328 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1329 switch (mode) {
1330 case TextureMiscMode::NDV:
1331 return (ndv_flag != 0);
1332 case TextureMiscMode::NODEP:
1333 return (nodep_flag != 0);
1334 default:
1335 break;
1336 }
1337 return false;
1338 }
1339 } tmml;
1340
1341 union {
1342 BitField<28, 1, u64> array;
1343 BitField<29, 2, TextureType> texture_type;
1344 BitField<35, 1, u64> ndv_flag;
1345 BitField<49, 1, u64> nodep_flag;
1346 BitField<50, 1, u64> dc_flag;
1347 BitField<54, 2, u64> offset_mode;
1348 BitField<56, 2, u64> component;
1349
1350 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1351 switch (mode) {
1352 case TextureMiscMode::NDV:
1353 return ndv_flag != 0;
1354 case TextureMiscMode::NODEP:
1355 return nodep_flag != 0;
1356 case TextureMiscMode::DC:
1357 return dc_flag != 0;
1358 case TextureMiscMode::AOFFI:
1359 return offset_mode == 1;
1360 case TextureMiscMode::PTP:
1361 return offset_mode == 2;
1362 default:
1363 break;
1364 }
1365 return false;
1366 }
1367 } tld4;
1368
1369 union {
1370 BitField<35, 1, u64> ndv_flag;
1371 BitField<49, 1, u64> nodep_flag;
1372 BitField<50, 1, u64> dc_flag;
1373 BitField<33, 2, u64> offset_mode;
1374 BitField<37, 2, u64> component;
1375
1376 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1377 switch (mode) {
1378 case TextureMiscMode::NDV:
1379 return ndv_flag != 0;
1380 case TextureMiscMode::NODEP:
1381 return nodep_flag != 0;
1382 case TextureMiscMode::DC:
1383 return dc_flag != 0;
1384 case TextureMiscMode::AOFFI:
1385 return offset_mode == 1;
1386 case TextureMiscMode::PTP:
1387 return offset_mode == 2;
1388 default:
1389 break;
1390 }
1391 return false;
1392 }
1393 } tld4_b;
1394
1395 union {
1396 BitField<49, 1, u64> nodep_flag;
1397 BitField<50, 1, u64> dc_flag;
1398 BitField<51, 1, u64> aoffi_flag;
1399 BitField<52, 2, u64> component;
1400 BitField<55, 1, u64> fp16_flag;
1401
1402 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1403 switch (mode) {
1404 case TextureMiscMode::DC:
1405 return dc_flag != 0;
1406 case TextureMiscMode::NODEP:
1407 return nodep_flag != 0;
1408 case TextureMiscMode::AOFFI:
1409 return aoffi_flag != 0;
1410 default:
1411 break;
1412 }
1413 return false;
1414 }
1415 } tld4s;
1416
1417 union {
1418 BitField<0, 8, Register> gpr0;
1419 BitField<28, 8, Register> gpr28;
1420 BitField<49, 1, u64> nodep_flag;
1421 BitField<50, 3, u64> component_mask_selector;
1422 BitField<53, 4, u64> texture_info;
1423 BitField<59, 1, u64> fp32_flag;
1424
1425 [[nodiscard]] TextureType GetTextureType() const {
1426 // The TEXS instruction has a weird encoding for the texture type.
1427 if (texture_info == 0) {
1428 return TextureType::Texture1D;
1429 }
1430 if (texture_info >= 1 && texture_info <= 9) {
1431 return TextureType::Texture2D;
1432 }
1433 if (texture_info >= 10 && texture_info <= 11) {
1434 return TextureType::Texture3D;
1435 }
1436 if (texture_info >= 12 && texture_info <= 13) {
1437 return TextureType::TextureCube;
1438 }
1439
1440 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1441 UNREACHABLE();
1442 return TextureType::Texture1D;
1443 }
1444
1445 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1446 switch (texture_info) {
1447 case 0:
1448 case 2:
1449 case 6:
1450 case 8:
1451 case 9:
1452 case 11:
1453 return TextureProcessMode::LZ;
1454 case 3:
1455 case 5:
1456 case 13:
1457 return TextureProcessMode::LL;
1458 default:
1459 break;
1460 }
1461 return TextureProcessMode::None;
1462 }
1463
1464 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1465 switch (mode) {
1466 case TextureMiscMode::DC:
1467 return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
1468 case TextureMiscMode::NODEP:
1469 return nodep_flag != 0;
1470 default:
1471 break;
1472 }
1473 return false;
1474 }
1475
1476 [[nodiscard]] bool IsArrayTexture() const {
1477 // TEXS only supports Texture2D arrays.
1478 return texture_info >= 7 && texture_info <= 9;
1479 }
1480
1481 [[nodiscard]] bool HasTwoDestinations() const {
1482 return gpr28.Value() != Register::ZeroIndex;
1483 }
1484
1485 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1486 static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
1487 {},
1488 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1489 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
1490 {0x7, 0xb, 0xd, 0xe, 0xf},
1491 }};
1492
1493 std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U};
1494 index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0;
1495
1496 u32 mask = mask_lut[index][component_mask_selector];
1497 // A mask of 0 means this instruction uses an unimplemented mask.
1498 ASSERT(mask != 0);
1499 return ((1ull << component) & mask) != 0;
1500 }
1501 } texs;
1502
1503 union {
1504 BitField<28, 1, u64> is_array;
1505 BitField<29, 2, TextureType> texture_type;
1506 BitField<35, 1, u64> aoffi;
1507 BitField<49, 1, u64> nodep_flag;
1508 BitField<50, 1, u64> ms; // Multisample?
1509 BitField<54, 1, u64> cl;
1510 BitField<55, 1, u64> process_mode;
1511
1512 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1513 return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
1514 }
1515 } tld;
1516
1517 union {
1518 BitField<49, 1, u64> nodep_flag;
1519 BitField<53, 4, u64> texture_info;
1520 BitField<59, 1, u64> fp32_flag;
1521
1522 [[nodiscard]] TextureType GetTextureType() const {
1523 // The TLDS instruction has a weird encoding for the texture type.
1524 if (texture_info <= 1) {
1525 return TextureType::Texture1D;
1526 }
1527 if (texture_info == 2 || texture_info == 8 || texture_info == 12 ||
1528 (texture_info >= 4 && texture_info <= 6)) {
1529 return TextureType::Texture2D;
1530 }
1531 if (texture_info == 7) {
1532 return TextureType::Texture3D;
1533 }
1534
1535 LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
1536 UNREACHABLE();
1537 return TextureType::Texture1D;
1538 }
1539
1540 [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
1541 if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
1542 return TextureProcessMode::LL;
1543 }
1544 return TextureProcessMode::LZ;
1545 }
1546
1547 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1548 switch (mode) {
1549 case TextureMiscMode::AOFFI:
1550 return texture_info == 12 || texture_info == 4;
1551 case TextureMiscMode::MZ:
1552 return texture_info == 5;
1553 case TextureMiscMode::NODEP:
1554 return nodep_flag != 0;
1555 default:
1556 break;
1557 }
1558 return false;
1559 }
1560
1561 [[nodiscard]] bool IsArrayTexture() const {
1562 // TEXS only supports Texture2D arrays.
1563 return texture_info == 8;
1564 }
1565 } tlds;
1566
1567 union {
1568 BitField<28, 1, u64> is_array;
1569 BitField<29, 2, TextureType> texture_type;
1570 BitField<35, 1, u64> aoffi_flag;
1571 BitField<49, 1, u64> nodep_flag;
1572
1573 [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
1574 switch (mode) {
1575 case TextureMiscMode::AOFFI:
1576 return aoffi_flag != 0;
1577 case TextureMiscMode::NODEP:
1578 return nodep_flag != 0;
1579 default:
1580 break;
1581 }
1582 return false;
1583 }
1584
1585 } txd;
1586
1587 union {
1588 BitField<24, 2, StoreCacheManagement> cache_management;
1589 BitField<33, 3, ImageType> image_type;
1590 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1591 BitField<51, 1, u64> is_immediate;
1592 BitField<52, 1, SurfaceDataMode> mode;
1593
1594 BitField<20, 3, StoreType> store_data_layout;
1595 BitField<20, 4, u64> component_mask_selector;
1596
1597 [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
1598 ASSERT(mode == SurfaceDataMode::P);
1599 constexpr u8 R = 0b0001;
1600 constexpr u8 G = 0b0010;
1601 constexpr u8 B = 0b0100;
1602 constexpr u8 A = 0b1000;
1603 constexpr std::array<u8, 16> mask = {
1604 0, (R), (G), (R | G), (B), (R | B),
1605 (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A),
1606 (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
1607 return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
1608 }
1609
1610 [[nodiscard]] StoreType GetStoreDataLayout() const {
1611 ASSERT(mode == SurfaceDataMode::D_BA);
1612 return store_data_layout;
1613 }
1614 } suldst;
1615
1616 union {
1617 BitField<28, 1, u64> is_ba;
1618 BitField<51, 3, ImageAtomicOperationType> operation_type;
1619 BitField<33, 3, ImageType> image_type;
1620 BitField<29, 4, ImageAtomicOperation> operation;
1621 BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
1622 } suatom_d;
1623
1624 union {
1625 BitField<20, 24, u64> target;
1626 BitField<5, 1, u64> constant_buffer;
1627
1628 [[nodiscard]] s32 GetBranchTarget() const {
1629 // Sign extend the branch target offset
1630 const auto mask = 1U << (24 - 1);
1631 const auto target_value = static_cast<u32>(target);
1632 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1633
1634 // The branch offset is relative to the next instruction and is stored in bytes, so
1635 // divide it by the size of an instruction and add 1 to it.
1636 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1637 }
1638 } bra;
1639
1640 union {
1641 BitField<20, 24, u64> target;
1642 BitField<5, 1, u64> constant_buffer;
1643
1644 [[nodiscard]] s32 GetBranchExtend() const {
1645 // Sign extend the branch target offset
1646 const auto mask = 1U << (24 - 1);
1647 const auto target_value = static_cast<u32>(target);
1648 constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
1649
1650 // The branch offset is relative to the next instruction and is stored in bytes, so
1651 // divide it by the size of an instruction and add 1 to it.
1652 return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
1653 }
1654 } brx;
1655
1656 union {
1657 BitField<39, 1, u64> emit; // EmitVertex
1658 BitField<40, 1, u64> cut; // EndPrimitive
1659 } out;
1660
1661 union {
1662 BitField<31, 1, u64> skew;
1663 BitField<32, 1, u64> o;
1664 BitField<33, 2, IsberdMode> mode;
1665 BitField<47, 2, IsberdShift> shift;
1666 } isberd;
1667
1668 union {
1669 BitField<8, 2, MembarType> type;
1670 BitField<0, 2, MembarUnknown> unknown;
1671 } membar;
1672
1673 union {
1674 BitField<48, 1, u64> signed_a;
1675 BitField<38, 1, u64> is_byte_chunk_a;
1676 BitField<36, 2, VideoType> type_a;
1677 BitField<36, 2, u64> byte_height_a;
1678
1679 BitField<49, 1, u64> signed_b;
1680 BitField<50, 1, u64> use_register_b;
1681 BitField<30, 1, u64> is_byte_chunk_b;
1682 BitField<28, 2, VideoType> type_b;
1683 BitField<28, 2, u64> byte_height_b;
1684 } video;
1685
1686 union {
1687 BitField<51, 2, VmadShr> shr;
1688 BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
1689 BitField<47, 1, u64> cc;
1690 } vmad;
1691
1692 union {
1693 BitField<54, 1, u64> is_dest_signed;
1694 BitField<48, 1, u64> is_src_a_signed;
1695 BitField<49, 1, u64> is_src_b_signed;
1696 BitField<37, 2, u64> src_format_a;
1697 BitField<29, 2, u64> src_format_b;
1698 BitField<56, 1, u64> mx;
1699 BitField<55, 1, u64> sat;
1700 BitField<36, 2, u64> selector_a;
1701 BitField<28, 2, u64> selector_b;
1702 BitField<50, 1, u64> is_op_b_register;
1703 BitField<51, 3, VmnmxOperation> operation;
1704
1705 [[nodiscard]] VmnmxType SourceFormatA() const {
1706 switch (src_format_a) {
1707 case 0b11:
1708 return VmnmxType::Bits32;
1709 case 0b10:
1710 return VmnmxType::Bits16;
1711 default:
1712 return VmnmxType::Bits8;
1713 }
1714 }
1715
1716 [[nodiscard]] VmnmxType SourceFormatB() const {
1717 switch (src_format_b) {
1718 case 0b11:
1719 return VmnmxType::Bits32;
1720 case 0b10:
1721 return VmnmxType::Bits16;
1722 default:
1723 return VmnmxType::Bits8;
1724 }
1725 }
1726 } vmnmx;
1727
1728 union {
1729 BitField<20, 16, u64> imm20_16;
1730 BitField<35, 1, u64> high_b_rr; // used on RR
1731 BitField<36, 1, u64> product_shift_left;
1732 BitField<37, 1, u64> merge_37;
1733 BitField<48, 1, u64> sign_a;
1734 BitField<49, 1, u64> sign_b;
1735 BitField<50, 2, XmadMode> mode_cbf; // used by CR, RC
1736 BitField<50, 3, XmadMode> mode;
1737 BitField<52, 1, u64> high_b;
1738 BitField<53, 1, u64> high_a;
1739 BitField<55, 1, u64> product_shift_left_second; // used on CR
1740 BitField<56, 1, u64> merge_56;
1741 } xmad;
1742
1743 union {
1744 BitField<20, 14, u64> shifted_offset;
1745 BitField<34, 5, u64> index;
1746
1747 [[nodiscard]] u64 GetOffset() const {
1748 return shifted_offset * 4;
1749 }
1750 } cbuf34;
1751
1752 union {
1753 BitField<20, 16, s64> offset;
1754 BitField<36, 5, u64> index;
1755
1756 [[nodiscard]] s64 GetOffset() const {
1757 return offset;
1758 }
1759 } cbuf36;
1760
1761 // Unsure about the size of this one.
1762 // It's always used with a gpr0, so any size should be fine.
1763 BitField<20, 8, SystemVariable> sys20;
1764
1765 BitField<47, 1, u64> generates_cc;
1766 BitField<61, 1, u64> is_b_imm;
1767 BitField<60, 1, u64> is_b_gpr;
1768 BitField<59, 1, u64> is_c_gpr;
1769 BitField<20, 24, s64> smem_imm;
1770 BitField<0, 5, ConditionCode> flow_condition_code;
1771
1772 Attribute attribute;
1773 Sampler sampler;
1774 Image image;
1775
1776 u64 value;
1777};
1778static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size");
1779static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout");
1780
1781class OpCode {
1782public:
1783 enum class Id {
1784 KIL,
1785 SSY,
1786 SYNC,
1787 BRK,
1788 DEPBAR,
1789 VOTE,
1790 VOTE_VTG,
1791 SHFL,
1792 FSWZADD,
1793 BFE_C,
1794 BFE_R,
1795 BFE_IMM,
1796 BFI_RC,
1797 BFI_IMM_R,
1798 BRA,
1799 BRX,
1800 PBK,
1801 LD_A,
1802 LD_L,
1803 LD_S,
1804 LD_C,
1805 LD, // Load from generic memory
1806 LDG, // Load from global memory
1807 ST_A,
1808 ST_L,
1809 ST_S,
1810 ST, // Store in generic memory
1811 STG, // Store in global memory
1812 RED, // Reduction operation
1813 ATOM, // Atomic operation on global memory
1814 ATOMS, // Atomic operation on shared memory
1815 AL2P, // Transforms attribute memory into physical memory
1816 TEX,
1817 TEX_B, // Texture Load Bindless
1818 TXQ, // Texture Query
1819 TXQ_B, // Texture Query Bindless
1820 TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
1821 TLD, // Texture Load
1822 TLDS, // Texture Load with scalar/non-vec4 source/destinations
1823 TLD4, // Texture Gather 4
1824 TLD4_B, // Texture Gather 4 Bindless
1825 TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
1826 TMML_B, // Texture Mip Map Level
1827 TMML, // Texture Mip Map Level
1828 TXD, // Texture Gradient/Load with Derivates
1829 TXD_B, // Texture Gradient/Load with Derivates Bindless
1830 SUST, // Surface Store
1831 SULD, // Surface Load
1832 SUATOM, // Surface Atomic Operation
1833 EXIT,
1834 NOP,
1835 IPA,
1836 OUT_R, // Emit vertex/primitive
1837 ISBERD,
1838 BAR,
1839 MEMBAR,
1840 VMAD,
1841 VSETP,
1842 VMNMX,
1843 FFMA_IMM, // Fused Multiply and Add
1844 FFMA_CR,
1845 FFMA_RC,
1846 FFMA_RR,
1847 FADD_C,
1848 FADD_R,
1849 FADD_IMM,
1850 FADD32I,
1851 FMUL_C,
1852 FMUL_R,
1853 FMUL_IMM,
1854 FMUL32_IMM,
1855 IADD_C,
1856 IADD_R,
1857 IADD_IMM,
1858 IADD3_C, // Add 3 Integers
1859 IADD3_R,
1860 IADD3_IMM,
1861 IADD32I,
1862 ISCADD_C, // Scale and Add
1863 ISCADD_R,
1864 ISCADD_IMM,
1865 FLO_R,
1866 FLO_C,
1867 FLO_IMM,
1868 LEA_R1,
1869 LEA_R2,
1870 LEA_RZ,
1871 LEA_IMM,
1872 LEA_HI,
1873 HADD2_C,
1874 HADD2_R,
1875 HADD2_IMM,
1876 HMUL2_C,
1877 HMUL2_R,
1878 HMUL2_IMM,
1879 HFMA2_CR,
1880 HFMA2_RC,
1881 HFMA2_RR,
1882 HFMA2_IMM_R,
1883 HSETP2_C,
1884 HSETP2_R,
1885 HSETP2_IMM,
1886 HSET2_C,
1887 HSET2_R,
1888 HSET2_IMM,
1889 POPC_C,
1890 POPC_R,
1891 POPC_IMM,
1892 SEL_C,
1893 SEL_R,
1894 SEL_IMM,
1895 ICMP_RC,
1896 ICMP_R,
1897 ICMP_CR,
1898 ICMP_IMM,
1899 FCMP_RR,
1900 FCMP_RC,
1901 FCMP_IMMR,
1902 MUFU, // Multi-Function Operator
1903 RRO_C, // Range Reduction Operator
1904 RRO_R,
1905 RRO_IMM,
1906 F2F_C,
1907 F2F_R,
1908 F2F_IMM,
1909 F2I_C,
1910 F2I_R,
1911 F2I_IMM,
1912 I2F_C,
1913 I2F_R,
1914 I2F_IMM,
1915 I2I_C,
1916 I2I_R,
1917 I2I_IMM,
1918 LOP_C,
1919 LOP_R,
1920 LOP_IMM,
1921 LOP32I,
1922 LOP3_C,
1923 LOP3_R,
1924 LOP3_IMM,
1925 MOV_C,
1926 MOV_R,
1927 MOV_IMM,
1928 S2R,
1929 MOV32_IMM,
1930 SHL_C,
1931 SHL_R,
1932 SHL_IMM,
1933 SHR_C,
1934 SHR_R,
1935 SHR_IMM,
1936 SHF_RIGHT_R,
1937 SHF_RIGHT_IMM,
1938 SHF_LEFT_R,
1939 SHF_LEFT_IMM,
1940 FMNMX_C,
1941 FMNMX_R,
1942 FMNMX_IMM,
1943 IMNMX_C,
1944 IMNMX_R,
1945 IMNMX_IMM,
1946 FSETP_C, // Set Predicate
1947 FSETP_R,
1948 FSETP_IMM,
1949 FSET_C,
1950 FSET_R,
1951 FSET_IMM,
1952 ISETP_C,
1953 ISETP_IMM,
1954 ISETP_R,
1955 ISET_R,
1956 ISET_C,
1957 ISET_IMM,
1958 PSETP,
1959 PSET,
1960 CSETP,
1961 R2P_IMM,
1962 P2R_IMM,
1963 XMAD_IMM,
1964 XMAD_CR,
1965 XMAD_RC,
1966 XMAD_RR,
1967 };
1968
1969 enum class Type {
1970 Trivial,
1971 Arithmetic,
1972 ArithmeticImmediate,
1973 ArithmeticInteger,
1974 ArithmeticIntegerImmediate,
1975 ArithmeticHalf,
1976 ArithmeticHalfImmediate,
1977 Bfe,
1978 Bfi,
1979 Shift,
1980 Ffma,
1981 Hfma2,
1982 Flow,
1983 Synch,
1984 Warp,
1985 Memory,
1986 Texture,
1987 Image,
1988 FloatSet,
1989 FloatSetPredicate,
1990 IntegerSet,
1991 IntegerSetPredicate,
1992 HalfSet,
1993 HalfSetPredicate,
1994 PredicateSetPredicate,
1995 PredicateSetRegister,
1996 RegisterSetPredicate,
1997 Conversion,
1998 Video,
1999 Xmad,
2000 Unknown,
2001 };
2002
2003 /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
2004 /// conditionally executed).
2005 [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
2006 // TODO(Subv): Add the rest of unpredicated instructions.
2007 return opcode != Id::SSY && opcode != Id::PBK;
2008 }
2009
2010 class Matcher {
2011 public:
2012 constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
2013 : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
2014
2015 [[nodiscard]] constexpr const char* GetName() const {
2016 return name;
2017 }
2018
2019 [[nodiscard]] constexpr u16 GetMask() const {
2020 return mask;
2021 }
2022
2023 [[nodiscard]] constexpr Id GetId() const {
2024 return id;
2025 }
2026
2027 [[nodiscard]] constexpr Type GetType() const {
2028 return type;
2029 }
2030
2031 /**
2032 * Tests to see if the given instruction is the instruction this matcher represents.
2033 * @param instruction The instruction to test
2034 * @returns true if the given instruction matches.
2035 */
2036 [[nodiscard]] constexpr bool Matches(u16 instruction) const {
2037 return (instruction & mask) == expected;
2038 }
2039
2040 private:
2041 const char* name;
2042 u16 mask;
2043 u16 expected;
2044 Id id;
2045 Type type;
2046 };
2047
2048 using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
2049 [[nodiscard]] static DecodeResult Decode(Instruction instr) {
2050 static const auto table{GetDecodeTable()};
2051
2052 const auto matches_instruction = [instr](const auto& matcher) {
2053 return matcher.Matches(static_cast<u16>(instr.opcode));
2054 };
2055
2056 auto iter = std::find_if(table.begin(), table.end(), matches_instruction);
2057 return iter != table.end() ? std::optional<std::reference_wrapper<const Matcher>>(*iter)
2058 : std::nullopt;
2059 }
2060
2061private:
2062 struct Detail {
2063 private:
2064 static constexpr std::size_t opcode_bitsize = 16;
2065
2066 /**
2067 * Generates the mask and the expected value after masking from a given bitstring.
2068 * A '0' in a bitstring indicates that a zero must be present at that bit position.
2069 * A '1' in a bitstring indicates that a one must be present at that bit position.
2070 */
2071 [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
2072 u16 mask = 0, expect = 0;
2073 for (std::size_t i = 0; i < opcode_bitsize; i++) {
2074 const std::size_t bit_position = opcode_bitsize - i - 1;
2075 switch (bitstring[i]) {
2076 case '0':
2077 mask |= static_cast<u16>(1U << bit_position);
2078 break;
2079 case '1':
2080 expect |= static_cast<u16>(1U << bit_position);
2081 mask |= static_cast<u16>(1U << bit_position);
2082 break;
2083 default:
2084 // Ignore
2085 break;
2086 }
2087 }
2088 return std::make_pair(mask, expect);
2089 }
2090
2091 public:
2092 /// Creates a matcher that can match and parse instructions based on bitstring.
2093 [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
2094 Type type, const char* const name) {
2095 const auto [mask, expected] = GetMaskAndExpect(bitstring);
2096 return Matcher(name, mask, expected, op, type);
2097 }
2098 };
2099
2100 [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
2101 std::vector<Matcher> table = {
2102#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
2103 INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
2104 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
2105 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
2106 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
2107 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
2108 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
2109 INST("111000110100----", Id::BRK, Type::Flow, "BRK"),
2110 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2111 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2112 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2113 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2114 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2115 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2116 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
2117 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
2118 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
2119 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
2120 INST("100-------------", Id::LD, Type::Memory, "LD"),
2121 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
2122 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
2123 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
2124 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
2125 INST("101-------------", Id::ST, Type::Memory, "ST"),
2126 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
2127 INST("1110101111111---", Id::RED, Type::Memory, "RED"),
2128 INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
2129 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
2130 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
2131 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
2132 INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
2133 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
2134 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
2135 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
2136 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
2137 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
2138 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
2139 INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
2140 INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
2141 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
2142 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
2143 INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
2144 INST("11011110001110--", Id::TXD, Type::Texture, "TXD"),
2145 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
2146 INST("11101011000-----", Id::SULD, Type::Image, "SULD"),
2147 INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
2148 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
2149 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2150 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2151 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2152 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2153 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2154 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2155 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2156 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2157 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2158 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2159 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
2160 INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
2161 INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
2162 INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
2163 INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
2164 INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
2165 INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
2166 INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
2167 INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
2168 INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
2169 INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
2170 INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
2171 INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
2172 INST("010011001100----", Id::IADD3_C, Type::ArithmeticInteger, "IADD3_C"),
2173 INST("010111001100----", Id::IADD3_R, Type::ArithmeticInteger, "IADD3_R"),
2174 INST("0011100-1100----", Id::IADD3_IMM, Type::ArithmeticInteger, "IADD3_IMM"),
2175 INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
2176 INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
2177 INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
2178 INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
2179 INST("0100110000001---", Id::POPC_C, Type::ArithmeticInteger, "POPC_C"),
2180 INST("0101110000001---", Id::POPC_R, Type::ArithmeticInteger, "POPC_R"),
2181 INST("0011100-00001---", Id::POPC_IMM, Type::ArithmeticInteger, "POPC_IMM"),
2182 INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
2183 INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
2184 INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
2185 INST("010100110100----", Id::ICMP_RC, Type::ArithmeticInteger, "ICMP_RC"),
2186 INST("010110110100----", Id::ICMP_R, Type::ArithmeticInteger, "ICMP_R"),
2187 INST("010010110100----", Id::ICMP_CR, Type::ArithmeticInteger, "ICMP_CR"),
2188 INST("0011011-0100----", Id::ICMP_IMM, Type::ArithmeticInteger, "ICMP_IMM"),
2189 INST("0101110000110---", Id::FLO_R, Type::ArithmeticInteger, "FLO_R"),
2190 INST("0100110000110---", Id::FLO_C, Type::ArithmeticInteger, "FLO_C"),
2191 INST("0011100-00110---", Id::FLO_IMM, Type::ArithmeticInteger, "FLO_IMM"),
2192 INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
2193 INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
2194 INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
2195 INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
2196 INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
2197 INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
2198 INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
2199 INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
2200 INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
2201 INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
2202 INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
2203 INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
2204 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
2205 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
2206 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
2207 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2208 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2209 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2210 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2211 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2212 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2213 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2214 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2215 INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
2216 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
2217 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
2218 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
2219 INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"),
2220 INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
2221 INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
2222 INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
2223 INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
2224 INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
2225 INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
2226 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2227 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2228 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2229 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2230 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2231 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2232 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
2233 INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
2234 INST("0100110000100---", Id::IMNMX_C, Type::ArithmeticInteger, "IMNMX_C"),
2235 INST("0101110000100---", Id::IMNMX_R, Type::ArithmeticInteger, "IMNMX_R"),
2236 INST("0011100-00100---", Id::IMNMX_IMM, Type::ArithmeticInteger, "IMNMX_IMM"),
2237 INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
2238 INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
2239 INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
2240 INST("0101001111110---", Id::BFI_RC, Type::Bfi, "BFI_RC"),
2241 INST("0011011-11110---", Id::BFI_IMM_R, Type::Bfi, "BFI_IMM_R"),
2242 INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
2243 INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
2244 INST("0011100-01000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
2245 INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
2246 INST("0000001---------", Id::LOP3_C, Type::ArithmeticInteger, "LOP3_C"),
2247 INST("0101101111100---", Id::LOP3_R, Type::ArithmeticInteger, "LOP3_R"),
2248 INST("0011110---------", Id::LOP3_IMM, Type::ArithmeticInteger, "LOP3_IMM"),
2249 INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
2250 INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
2251 INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
2252 INST("0100110000101---", Id::SHR_C, Type::Shift, "SHR_C"),
2253 INST("0101110000101---", Id::SHR_R, Type::Shift, "SHR_R"),
2254 INST("0011100-00101---", Id::SHR_IMM, Type::Shift, "SHR_IMM"),
2255 INST("0101110011111---", Id::SHF_RIGHT_R, Type::Shift, "SHF_RIGHT_R"),
2256 INST("0011100-11111---", Id::SHF_RIGHT_IMM, Type::Shift, "SHF_RIGHT_IMM"),
2257 INST("0101101111111---", Id::SHF_LEFT_R, Type::Shift, "SHF_LEFT_R"),
2258 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2259 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2260 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2261 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2262 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2263 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2264 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
2265 INST("01011000--------", Id::FSET_R, Type::FloatSet, "FSET_R"),
2266 INST("0100100---------", Id::FSET_C, Type::FloatSet, "FSET_C"),
2267 INST("0011000---------", Id::FSET_IMM, Type::FloatSet, "FSET_IMM"),
2268 INST("010010111011----", Id::FSETP_C, Type::FloatSetPredicate, "FSETP_C"),
2269 INST("010110111011----", Id::FSETP_R, Type::FloatSetPredicate, "FSETP_R"),
2270 INST("0011011-1011----", Id::FSETP_IMM, Type::FloatSetPredicate, "FSETP_IMM"),
2271 INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
2272 INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
2273 INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
2274 INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
2275 INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
2276 INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
2277 INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
2278 INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
2279 INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"),
2280 INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"),
2281 INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"),
2282 INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
2283 INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
2284 INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
2285 INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
2286 };
2287#undef INST
2288 std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
2289 // If a matcher has more bits in its mask it is more specific, so it
2290 // should come first.
2291 return std::bitset<16>(a.GetMask()).count() > std::bitset<16>(b.GetMask()).count();
2292 });
2293
2294 return table;
2295 }
2296};
2297
2298} // namespace Tegra::Shader
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
deleted file mode 100644
index e0d7b89c5..000000000
--- a/src/video_core/engines/shader_header.h
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9
10#include "common/bit_field.h"
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13
14namespace Tegra::Shader {
15
16enum class OutputTopology : u32 {
17 PointList = 1,
18 LineStrip = 6,
19 TriangleStrip = 7,
20};
21
22enum class PixelImap : u8 {
23 Unused = 0,
24 Constant = 1,
25 Perspective = 2,
26 ScreenLinear = 3,
27};
28
29// Documentation in:
30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
31struct Header {
32 union {
33 BitField<0, 5, u32> sph_type;
34 BitField<5, 5, u32> version;
35 BitField<10, 4, u32> shader_type;
36 BitField<14, 1, u32> mrt_enable;
37 BitField<15, 1, u32> kills_pixels;
38 BitField<16, 1, u32> does_global_store;
39 BitField<17, 4, u32> sass_version;
40 BitField<21, 5, u32> reserved;
41 BitField<26, 1, u32> does_load_or_store;
42 BitField<27, 1, u32> does_fp64;
43 BitField<28, 4, u32> stream_out_mask;
44 } common0;
45
46 union {
47 BitField<0, 24, u32> shader_local_memory_low_size;
48 BitField<24, 8, u32> per_patch_attribute_count;
49 } common1;
50
51 union {
52 BitField<0, 24, u32> shader_local_memory_high_size;
53 BitField<24, 8, u32> threads_per_input_primitive;
54 } common2;
55
56 union {
57 BitField<0, 24, u32> shader_local_memory_crs_size;
58 BitField<24, 4, OutputTopology> output_topology;
59 BitField<28, 4, u32> reserved;
60 } common3;
61
62 union {
63 BitField<0, 12, u32> max_output_vertices;
64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
65 BitField<20, 4, u32> reserved;
66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
67 } common4;
68
69 union {
70 struct {
71 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
72 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
73 INSERT_PADDING_BYTES_NOINIT(16); // ImapGenericVector[32]
74 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
75 union {
76 BitField<0, 8, u16> clip_distances;
77 BitField<8, 1, u16> point_sprite_s;
78 BitField<9, 1, u16> point_sprite_t;
79 BitField<10, 1, u16> fog_coordinate;
80 BitField<12, 1, u16> tessellation_eval_point_u;
81 BitField<13, 1, u16> tessellation_eval_point_v;
82 BitField<14, 1, u16> instance_id;
83 BitField<15, 1, u16> vertex_id;
84 };
85 INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10]
86 INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved
87 INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA
88 INSERT_PADDING_BYTES_NOINIT(1); // OmapSystemValuesB
89 INSERT_PADDING_BYTES_NOINIT(16); // OmapGenericVector[32]
90 INSERT_PADDING_BYTES_NOINIT(2); // OmapColor
91 INSERT_PADDING_BYTES_NOINIT(2); // OmapSystemValuesC
92 INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10]
93 INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved
94 } vtg;
95
96 struct {
97 INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA
98 INSERT_PADDING_BYTES_NOINIT(1); // ImapSystemValuesB
99
100 union {
101 BitField<0, 2, PixelImap> x;
102 BitField<2, 2, PixelImap> y;
103 BitField<4, 2, PixelImap> z;
104 BitField<6, 2, PixelImap> w;
105 u8 raw;
106 } imap_generic_vector[32];
107
108 INSERT_PADDING_BYTES_NOINIT(2); // ImapColor
109 INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC
110 INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10]
111 INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved
112
113 struct {
114 u32 target;
115 union {
116 BitField<0, 1, u32> sample_mask;
117 BitField<1, 1, u32> depth;
118 BitField<2, 30, u32> reserved;
119 };
120 } omap;
121
122 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
123 const u32 bit = render_target * 4 + component;
124 return omap.target & (1 << bit);
125 }
126
127 PixelImap GetPixelImap(u32 attribute) const {
128 const auto get_index = [this, attribute](u32 index) {
129 return static_cast<PixelImap>(
130 (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
131 };
132
133 std::optional<PixelImap> result;
134 for (u32 component = 0; component < 4; ++component) {
135 const PixelImap index = get_index(component);
136 if (index == PixelImap::Unused) {
137 continue;
138 }
139 if (result && result != index) {
140 LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
141 }
142 result = index;
143 }
144 return result.value_or(PixelImap::Unused);
145 }
146 } ps;
147
148 std::array<u32, 0xF> raw;
149 };
150
151 u64 GetLocalMemorySize() const {
152 return (common1.shader_local_memory_low_size |
153 (common2.shader_local_memory_high_size << 24));
154 }
155};
156static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
157
158} // namespace Tegra::Shader
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 7a3660496..588ce6139 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -4,6 +4,9 @@
4 4
5#include <vector> 5#include <vector>
6 6
7#include <boost/container/small_vector.hpp>
8
9#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 10#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 11#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 12#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
@@ -13,9 +16,142 @@
13#include "video_core/vulkan_common/vulkan_wrapper.h" 16#include "video_core/vulkan_common/vulkan_wrapper.h"
14 17
15namespace Vulkan { 18namespace Vulkan {
19namespace {
20vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Shader::Info& info) {
21 boost::container::small_vector<VkDescriptorSetLayoutBinding, 24> bindings;
22 u32 binding{};
23 for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
24 bindings.push_back({
25 .binding = binding,
26 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
27 .descriptorCount = 1,
28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
29 .pImmutableSamplers = nullptr,
30 });
31 ++binding;
32 }
33 for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
34 bindings.push_back({
35 .binding = binding,
36 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
37 .descriptorCount = 1,
38 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
39 .pImmutableSamplers = nullptr,
40 });
41 ++binding;
42 }
43 return device.GetLogical().CreateDescriptorSetLayout({
44 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
45 .pNext = nullptr,
46 .flags = 0,
47 .bindingCount = static_cast<u32>(bindings.size()),
48 .pBindings = bindings.data(),
49 });
50}
51
52vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
53 const Device& device, const Shader::Info& info, VkDescriptorSetLayout descriptor_set_layout,
54 VkPipelineLayout pipeline_layout) {
55 boost::container::small_vector<VkDescriptorUpdateTemplateEntry, 24> entries;
56 size_t offset{};
57 u32 binding{};
58 for ([[maybe_unused]] const auto& desc : info.constant_buffer_descriptors) {
59 entries.push_back({
60 .dstBinding = binding,
61 .dstArrayElement = 0,
62 .descriptorCount = 1,
63 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
64 .offset = offset,
65 .stride = sizeof(DescriptorUpdateEntry),
66 });
67 ++binding;
68 offset += sizeof(DescriptorUpdateEntry);
69 }
70 for ([[maybe_unused]] const auto& desc : info.storage_buffers_descriptors) {
71 entries.push_back({
72 .dstBinding = binding,
73 .dstArrayElement = 0,
74 .descriptorCount = 1,
75 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
76 .offset = offset,
77 .stride = sizeof(DescriptorUpdateEntry),
78 });
79 ++binding;
80 offset += sizeof(DescriptorUpdateEntry);
81 }
82 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
83 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
84 .pNext = nullptr,
85 .flags = 0,
86 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
87 .pDescriptorUpdateEntries = entries.data(),
88 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET,
89 .descriptorSetLayout = descriptor_set_layout,
90 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_COMPUTE,
91 .pipelineLayout = pipeline_layout,
92 .set = 0,
93 });
94}
95} // Anonymous namespace
96
97ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
98 VKUpdateDescriptorQueue& update_descriptor_queue_,
99 const Shader::Info& info_, vk::ShaderModule spv_module_)
100 : update_descriptor_queue{&update_descriptor_queue_}, info{info_},
101 spv_module(std::move(spv_module_)),
102 descriptor_set_layout(CreateDescriptorSetLayout(device, info)),
103 descriptor_allocator(descriptor_pool, *descriptor_set_layout),
104 pipeline_layout{device.GetLogical().CreatePipelineLayout({
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
106 .pNext = nullptr,
107 .flags = 0,
108 .setLayoutCount = 1,
109 .pSetLayouts = descriptor_set_layout.address(),
110 .pushConstantRangeCount = 0,
111 .pPushConstantRanges = nullptr,
112 })},
113 descriptor_update_template{
114 CreateDescriptorUpdateTemplate(device, info, *descriptor_set_layout, *pipeline_layout)},
115 pipeline{device.GetLogical().CreateComputePipeline({
116 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
117 .pNext = nullptr,
118 .flags = 0,
119 .stage{
120 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
121 .pNext = nullptr,
122 .flags = 0,
123 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
124 .module = *spv_module,
125 .pName = "main",
126 .pSpecializationInfo = nullptr,
127 },
128 .layout = *pipeline_layout,
129 .basePipelineHandle = 0,
130 .basePipelineIndex = 0,
131 })} {}
132
133void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
134 u32 enabled_uniforms{};
135 for (const auto& desc : info.constant_buffer_descriptors) {
136 enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index;
137 }
138 buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms);
16 139
17ComputePipeline::ComputePipeline() = default; 140 buffer_cache.UnbindComputeStorageBuffers();
141 size_t index{};
142 for (const auto& desc : info.storage_buffers_descriptors) {
143 ASSERT(desc.count == 1);
144 buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true);
145 ++index;
146 }
147 buffer_cache.UpdateComputeBuffers();
148 buffer_cache.BindHostComputeBuffers();
149}
18 150
19ComputePipeline::~ComputePipeline() = default; 151VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
152 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
153 update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
154 return descriptor_set;
155}
20 156
21} // namespace Vulkan 157} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 433d8bb3d..dc045d524 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -5,19 +5,52 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "shader_recompiler/shader_info.h"
9#include "video_core/renderer_vulkan/vk_buffer_cache.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/vk_pipeline.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h"
9#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
10 14
11namespace Vulkan { 15namespace Vulkan {
12 16
13class Device; 17class Device;
14class VKScheduler;
15class VKUpdateDescriptorQueue;
16 18
17class ComputePipeline { 19class ComputePipeline : public Pipeline {
18public: 20public:
19 explicit ComputePipeline(); 21 explicit ComputePipeline() = default;
20 ~ComputePipeline(); 22 explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
23 VKUpdateDescriptorQueue& update_descriptor_queue,
24 const Shader::Info& info, vk::ShaderModule spv_module);
25
26 ComputePipeline& operator=(ComputePipeline&&) noexcept = default;
27 ComputePipeline(ComputePipeline&&) noexcept = default;
28
29 ComputePipeline& operator=(const ComputePipeline&) = delete;
30 ComputePipeline(const ComputePipeline&) = delete;
31
32 void ConfigureBufferCache(BufferCache& buffer_cache);
33
34 [[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
35
36 [[nodiscard]] VkPipeline Handle() const noexcept {
37 return *pipeline;
38 }
39
40 [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept {
41 return *pipeline_layout;
42 }
43
44private:
45 VKUpdateDescriptorQueue* update_descriptor_queue;
46 Shader::Info info;
47
48 vk::ShaderModule spv_module;
49 vk::DescriptorSetLayout descriptor_set_layout;
50 DescriptorAllocator descriptor_allocator;
51 vk::PipelineLayout pipeline_layout;
52 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
53 vk::Pipeline pipeline;
21}; 54};
22 55
23} // namespace Vulkan 56} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..3bea1ff44 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -19,9 +19,7 @@ constexpr std::size_t SETS_GROW_RATE = 0x20;
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, 19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
20 VkDescriptorSetLayout layout_) 20 VkDescriptorSetLayout layout_)
21 : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), 21 : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
22 descriptor_pool{descriptor_pool_}, layout{layout_} {} 22 descriptor_pool{&descriptor_pool_}, layout{layout_} {}
23
24DescriptorAllocator::~DescriptorAllocator() = default;
25 23
26VkDescriptorSet DescriptorAllocator::Commit() { 24VkDescriptorSet DescriptorAllocator::Commit() {
27 const std::size_t index = CommitResource(); 25 const std::size_t index = CommitResource();
@@ -29,7 +27,7 @@ VkDescriptorSet DescriptorAllocator::Commit() {
29} 27}
30 28
31void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { 29void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); 30 descriptors_allocations.push_back(descriptor_pool->AllocateDescriptors(layout, end - begin));
33} 31}
34 32
35VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) 33VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler)
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..2501f9967 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -17,8 +17,12 @@ class VKScheduler;
17 17
18class DescriptorAllocator final : public ResourcePool { 18class DescriptorAllocator final : public ResourcePool {
19public: 19public:
20 explicit DescriptorAllocator() = default;
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); 21 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
21 ~DescriptorAllocator() override; 22 ~DescriptorAllocator() override = default;
23
24 DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
25 DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
22 26
23 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; 27 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
24 DescriptorAllocator(const DescriptorAllocator&) = delete; 28 DescriptorAllocator(const DescriptorAllocator&) = delete;
@@ -29,8 +33,8 @@ protected:
29 void Allocate(std::size_t begin, std::size_t end) override; 33 void Allocate(std::size_t begin, std::size_t end) override;
30 34
31private: 35private:
32 VKDescriptorPool& descriptor_pool; 36 VKDescriptorPool* descriptor_pool{};
33 const VkDescriptorSetLayout layout; 37 VkDescriptorSetLayout layout{};
34 38
35 std::vector<vk::DescriptorSets> descriptors_allocations; 39 std::vector<vk::DescriptorSets> descriptors_allocations;
36}; 40};
diff --git a/src/video_core/renderer_vulkan/vk_pipeline.h b/src/video_core/renderer_vulkan/vk_pipeline.h
new file mode 100644
index 000000000..b06288403
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_pipeline.h
@@ -0,0 +1,36 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include "video_core/vulkan_common/vulkan_wrapper.h"
10
11namespace Vulkan {
12
13class Pipeline {
14public:
15 /// Add a reference count to the pipeline
16 void AddRef() noexcept {
17 ++ref_count;
18 }
19
20 [[nodiscard]] bool RemoveRef() noexcept {
21 --ref_count;
22 return ref_count == 0;
23 }
24
25 [[nodiscard]] u64 UsageTick() const noexcept {
26 return usage_tick;
27 }
28
29protected:
30 u64 usage_tick{};
31
32private:
33 size_t ref_count{};
34};
35
36} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 7d0ba1180..4bf3e4819 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -12,6 +12,8 @@
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "core/core.h" 13#include "core/core.h"
14#include "core/memory.h" 14#include "core/memory.h"
15#include "shader_recompiler/environment.h"
16#include "shader_recompiler/recompiler.h"
15#include "video_core/engines/kepler_compute.h" 17#include "video_core/engines/kepler_compute.h"
16#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h" 19#include "video_core/memory_manager.h"
@@ -22,43 +24,105 @@
22#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
23#include "video_core/renderer_vulkan/vk_rasterizer.h" 25#include "video_core/renderer_vulkan/vk_rasterizer.h"
24#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_shader_util.h"
25#include "video_core/renderer_vulkan/vk_update_descriptor.h" 28#include "video_core/renderer_vulkan/vk_update_descriptor.h"
26#include "video_core/shader_cache.h" 29#include "video_core/shader_cache.h"
27#include "video_core/shader_notify.h" 30#include "video_core/shader_notify.h"
28#include "video_core/vulkan_common/vulkan_device.h" 31#include "video_core/vulkan_common/vulkan_device.h"
29#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
30 33
34#pragma optimize("", off)
35
31namespace Vulkan { 36namespace Vulkan {
32MICROPROFILE_DECLARE(Vulkan_PipelineCache); 37MICROPROFILE_DECLARE(Vulkan_PipelineCache);
33 38
34using Tegra::Engines::ShaderType; 39using Tegra::Engines::ShaderType;
35 40
36namespace { 41namespace {
37size_t StageFromProgram(size_t program) { 42class Environment final : public Shader::Environment {
38 return program == 0 ? 0 : program - 1; 43public:
39} 44 explicit Environment(Tegra::Engines::KeplerCompute& kepler_compute_,
45 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_)
46 : kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, program_base{program_base_} {}
47
48 ~Environment() override = default;
49
50 [[nodiscard]] std::optional<u128> Analyze(u32 start_address) {
51 const std::optional<u64> size{TryFindSize(start_address)};
52 if (!size) {
53 return std::nullopt;
54 }
55 cached_lowest = start_address;
56 cached_highest = start_address + static_cast<u32>(*size);
57 return Common::CityHash128(reinterpret_cast<const char*>(code.data()), code.size());
58 }
40 59
41ShaderType StageFromProgram(Maxwell::ShaderProgram program) { 60 [[nodiscard]] size_t ShaderSize() const noexcept {
42 return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); 61 return read_highest - read_lowest + INST_SIZE;
43} 62 }
44 63
45ShaderType GetShaderType(Maxwell::ShaderProgram program) { 64 [[nodiscard]] u128 ComputeHash() const {
46 switch (program) { 65 const size_t size{ShaderSize()};
47 case Maxwell::ShaderProgram::VertexB: 66 auto data = std::make_unique<u64[]>(size);
48 return ShaderType::Vertex; 67 gpu_memory.ReadBlock(program_base + read_lowest, data.get(), size);
49 case Maxwell::ShaderProgram::TesselationControl: 68 return Common::CityHash128(reinterpret_cast<const char*>(data.get()), size);
50 return ShaderType::TesselationControl;
51 case Maxwell::ShaderProgram::TesselationEval:
52 return ShaderType::TesselationEval;
53 case Maxwell::ShaderProgram::Geometry:
54 return ShaderType::Geometry;
55 case Maxwell::ShaderProgram::Fragment:
56 return ShaderType::Fragment;
57 default:
58 UNIMPLEMENTED_MSG("program={}", program);
59 return ShaderType::Vertex;
60 } 69 }
61} 70
71 u64 ReadInstruction(u32 address) override {
72 read_lowest = std::min(read_lowest, address);
73 read_highest = std::max(read_highest, address);
74
75 if (address >= cached_lowest && address < cached_highest) {
76 return code[address / INST_SIZE];
77 }
78 return gpu_memory.Read<u64>(program_base + address);
79 }
80
81 std::array<u32, 3> WorkgroupSize() override {
82 const auto& qmd{kepler_compute.launch_description};
83 return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
84 }
85
86private:
87 static constexpr size_t INST_SIZE = sizeof(u64);
88 static constexpr size_t BLOCK_SIZE = 0x1000;
89 static constexpr size_t MAXIMUM_SIZE = 0x100000;
90
91 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
92 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
93
94 std::optional<u64> TryFindSize(u32 start_address) {
95 GPUVAddr guest_addr = program_base + start_address;
96 size_t offset = 0;
97 size_t size = BLOCK_SIZE;
98 while (size <= MAXIMUM_SIZE) {
99 code.resize(size / INST_SIZE);
100 u64* const data = code.data() + offset / INST_SIZE;
101 gpu_memory.ReadBlock(guest_addr, data, BLOCK_SIZE);
102 for (size_t i = 0; i < BLOCK_SIZE; i += INST_SIZE) {
103 const u64 inst = data[i / INST_SIZE];
104 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
105 return offset + i;
106 }
107 }
108 guest_addr += BLOCK_SIZE;
109 size += BLOCK_SIZE;
110 offset += BLOCK_SIZE;
111 }
112 return std::nullopt;
113 }
114
115 Tegra::Engines::KeplerCompute& kepler_compute;
116 Tegra::MemoryManager& gpu_memory;
117 GPUVAddr program_base;
118
119 u32 read_lowest = 0;
120 u32 read_highest = 0;
121
122 std::vector<u64> code;
123 u32 cached_lowest = std::numeric_limits<u32>::max();
124 u32 cached_highest = 0;
125};
62} // Anonymous namespace 126} // Anonymous namespace
63 127
64size_t ComputePipelineCacheKey::Hash() const noexcept { 128size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -70,35 +134,91 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
70 return std::memcmp(&rhs, this, sizeof *this) == 0; 134 return std::memcmp(&rhs, this, sizeof *this) == 0;
71} 135}
72 136
73Shader::Shader() = default;
74
75Shader::~Shader() = default;
76
77PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 137PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
78 Tegra::Engines::Maxwell3D& maxwell3d_, 138 Tegra::Engines::Maxwell3D& maxwell3d_,
79 Tegra::Engines::KeplerCompute& kepler_compute_, 139 Tegra::Engines::KeplerCompute& kepler_compute_,
80 Tegra::MemoryManager& gpu_memory_, const Device& device_, 140 Tegra::MemoryManager& gpu_memory_, const Device& device_,
81 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 141 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
82 VKUpdateDescriptorQueue& update_descriptor_queue_) 142 VKUpdateDescriptorQueue& update_descriptor_queue_)
83 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, 143 : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
84 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 144 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
85 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ 145 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
86 update_descriptor_queue_} {} 146 update_descriptor_queue_} {}
87 147
88PipelineCache::~PipelineCache() = default; 148PipelineCache::~PipelineCache() = default;
89 149
90ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 150ComputePipeline* PipelineCache::CurrentComputePipeline() {
91 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 151 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
92 152
93 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); 153 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
94 auto& entry = pair->second; 154 const auto& qmd{kepler_compute.launch_description};
95 if (!is_cache_miss) { 155 const GPUVAddr shader_addr{program_base + qmd.program_start};
96 return *entry; 156 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
157 if (!cpu_shader_addr) {
158 return nullptr;
159 }
160 ShaderInfo* const shader{TryGet(*cpu_shader_addr)};
161 if (!shader) {
162 return CreateComputePipelineWithoutShader(*cpu_shader_addr);
163 }
164 const ComputePipelineCacheKey key{MakeComputePipelineKey(shader->unique_hash)};
165 const auto [pair, is_new]{compute_cache.try_emplace(key)};
166 auto& pipeline{pair->second};
167 if (!is_new) {
168 return &pipeline;
169 }
170 pipeline = CreateComputePipeline(shader);
171 shader->compute_users.push_back(key);
172 return &pipeline;
173}
174
175ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) {
176 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
177 const auto& qmd{kepler_compute.launch_description};
178 Environment env{kepler_compute, gpu_memory, program_base};
179 if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) {
180 // TODO: Load from cache
97 } 181 }
98 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 182 const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)};
99 throw "Bad"; 183 shader_info->unique_hash = env.ComputeHash();
184 shader_info->size_bytes = env.ShaderSize();
185 return ComputePipeline{device, descriptor_pool, update_descriptor_queue, info,
186 BuildShader(device, code)};
100} 187}
101 188
102void PipelineCache::OnShaderRemoval(Shader*) {} 189ComputePipeline* PipelineCache::CreateComputePipelineWithoutShader(VAddr shader_cpu_addr) {
190 ShaderInfo shader;
191 ComputePipeline pipeline{CreateComputePipeline(&shader)};
192 const ComputePipelineCacheKey key{MakeComputePipelineKey(shader.unique_hash)};
193 shader.compute_users.push_back(key);
194 pipeline.AddRef();
195
196 const size_t size_bytes{shader.size_bytes};
197 Register(std::make_unique<ShaderInfo>(std::move(shader)), shader_cpu_addr, size_bytes);
198 return &compute_cache.emplace(key, std::move(pipeline)).first->second;
199}
200
201ComputePipelineCacheKey PipelineCache::MakeComputePipelineKey(u128 unique_hash) const {
202 const auto& qmd{kepler_compute.launch_description};
203 return {
204 .unique_hash = unique_hash,
205 .shared_memory_size = qmd.shared_alloc,
206 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
207 };
208}
209
210void PipelineCache::OnShaderRemoval(ShaderInfo* shader) {
211 for (const ComputePipelineCacheKey& key : shader->compute_users) {
212 const auto it = compute_cache.find(key);
213 ASSERT(it != compute_cache.end());
214
215 Pipeline& pipeline = it->second;
216 if (pipeline.RemoveRef()) {
217 // Wait for the pipeline to be free of GPU usage before destroying it
218 scheduler.Wait(pipeline.UsageTick());
219 compute_cache.erase(it);
220 }
221 }
222}
103 223
104} // namespace Vulkan 224} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e3e63340d..eb35abc27 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -36,7 +36,7 @@ class VKUpdateDescriptorQueue;
36using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 37
38struct ComputePipelineCacheKey { 38struct ComputePipelineCacheKey {
39 GPUVAddr shader; 39 u128 unique_hash;
40 u32 shared_memory_size; 40 u32 shared_memory_size;
41 std::array<u32, 3> workgroup_size; 41 std::array<u32, 3> workgroup_size;
42 42
@@ -67,13 +67,13 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
67 67
68namespace Vulkan { 68namespace Vulkan {
69 69
70class Shader { 70struct ShaderInfo {
71public: 71 u128 unique_hash{};
72 explicit Shader(); 72 size_t size_bytes{};
73 ~Shader(); 73 std::vector<ComputePipelineCacheKey> compute_users;
74}; 74};
75 75
76class PipelineCache final : public VideoCommon::ShaderCache<Shader> { 76class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
77public: 77public:
78 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 78 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
79 Tegra::Engines::Maxwell3D& maxwell3d, 79 Tegra::Engines::Maxwell3D& maxwell3d,
@@ -83,12 +83,18 @@ public:
83 VKUpdateDescriptorQueue& update_descriptor_queue); 83 VKUpdateDescriptorQueue& update_descriptor_queue);
84 ~PipelineCache() override; 84 ~PipelineCache() override;
85 85
86 ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 86 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
87 87
88protected: 88protected:
89 void OnShaderRemoval(Shader* shader) final; 89 void OnShaderRemoval(ShaderInfo* shader) override;
90 90
91private: 91private:
92 ComputePipeline CreateComputePipeline(ShaderInfo* shader);
93
94 ComputePipeline* CreateComputePipelineWithoutShader(VAddr shader_cpu_addr);
95
96 ComputePipelineCacheKey MakeComputePipelineKey(u128 unique_hash) const;
97
92 Tegra::GPU& gpu; 98 Tegra::GPU& gpu;
93 Tegra::Engines::Maxwell3D& maxwell3d; 99 Tegra::Engines::Maxwell3D& maxwell3d;
94 Tegra::Engines::KeplerCompute& kepler_compute; 100 Tegra::Engines::KeplerCompute& kepler_compute;
@@ -99,13 +105,7 @@ private:
99 VKDescriptorPool& descriptor_pool; 105 VKDescriptorPool& descriptor_pool;
100 VKUpdateDescriptorQueue& update_descriptor_queue; 106 VKUpdateDescriptorQueue& update_descriptor_queue;
101 107
102 std::unique_ptr<Shader> null_shader; 108 std::unordered_map<ComputePipelineCacheKey, ComputePipeline> compute_cache;
103 std::unique_ptr<Shader> null_kernel;
104
105 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
106
107 std::mutex pipeline_cache;
108 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
109}; 109};
110 110
111} // namespace Vulkan 111} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f152297d9..b757454c4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -36,6 +36,8 @@
36#include "video_core/vulkan_common/vulkan_device.h" 36#include "video_core/vulkan_common/vulkan_device.h"
37#include "video_core/vulkan_common/vulkan_wrapper.h" 37#include "video_core/vulkan_common/vulkan_wrapper.h"
38 38
39#pragma optimize("", off)
40
39namespace Vulkan { 41namespace Vulkan {
40 42
41using Maxwell = Tegra::Engines::Maxwell3D::Regs; 43using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -237,7 +239,26 @@ void RasterizerVulkan::Clear() {
237} 239}
238 240
239void RasterizerVulkan::DispatchCompute() { 241void RasterizerVulkan::DispatchCompute() {
240 UNREACHABLE_MSG("Not implemented"); 242 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
243 if (!pipeline) {
244 return;
245 }
246 std::scoped_lock lock{buffer_cache.mutex};
247 update_descriptor_queue.Acquire();
248 pipeline->ConfigureBufferCache(buffer_cache);
249 const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
250
251 const auto& qmd{kepler_compute.launch_description};
252 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
253 const VkPipeline pipeline_handle{pipeline->Handle()};
254 const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()};
255 scheduler.Record(
256 [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) {
257 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
258 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0,
259 descriptor_set, nullptr);
260 cmdbuf.Dispatch(dim[0], dim[1], dim[2]);
261 });
241} 262}
242 263
243void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 264void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 31017dc2b..3fd03b915 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,7 +21,6 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 22#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 23#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -150,8 +149,6 @@ private:
150 BlitImageHelper blit_image; 149 BlitImageHelper blit_image;
151 ASTCDecoderPass astc_decoder_pass; 150 ASTCDecoderPass astc_decoder_pass;
152 151
153 GraphicsPipelineCacheKey graphics_key;
154
155 TextureCacheRuntime texture_cache_runtime; 152 TextureCacheRuntime texture_cache_runtime;
156 TextureCache texture_cache; 153 TextureCache texture_cache;
157 BufferCacheRuntime buffer_cache_runtime; 154 BufferCacheRuntime buffer_cache_runtime;
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) 12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
13 : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} 13 : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
14
15ResourcePool::~ResourcePool() = default;
16 14
17size_t ResourcePool::CommitResource() { 15size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 16 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 17 master_semaphore->Refresh();
20 const u64 gpu_tick = master_semaphore.KnownGpuTick(); 18 const u64 gpu_tick = master_semaphore->KnownGpuTick();
21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { 19 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 20 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (gpu_tick >= ticks[iterator]) { 21 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 22 ticks[iterator] = master_semaphore->CurrentTick();
25 return iterator; 23 return iterator;
26 } 24 }
27 } 25 }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
36 // Both searches failed, the pool is full; handle it. 34 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 35 const size_t free_resource = ManageOverflow();
38 36
39 ticks[free_resource] = master_semaphore.CurrentTick(); 37 ticks[free_resource] = master_semaphore->CurrentTick();
40 found = free_resource; 38 found = free_resource;
41 } 39 }
42 } 40 }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
18 */ 18 */
19class ResourcePool { 19class ResourcePool {
20public: 20public:
21 explicit ResourcePool() = default;
21 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); 22 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
22 virtual ~ResourcePool(); 23
24 virtual ~ResourcePool() = default;
25
26 ResourcePool& operator=(ResourcePool&&) noexcept = default;
27 ResourcePool(ResourcePool&&) noexcept = default;
28
29 ResourcePool& operator=(const ResourcePool&) = default;
30 ResourcePool(const ResourcePool&) = default;
23 31
24protected: 32protected:
25 size_t CommitResource(); 33 size_t CommitResource();
@@ -34,7 +42,7 @@ private:
34 /// Allocates a new page of resources. 42 /// Allocates a new page of resources.
35 void Grow(); 43 void Grow();
36 44
37 MasterSemaphore& master_semaphore; 45 MasterSemaphore* master_semaphore{};
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 46 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found 47 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 48 std::vector<u64> ticks; ///< Ticks for each resource