summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/scripts/common/post-upload.sh4
-rw-r--r--.ci/scripts/windows/upload.ps126
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--externals/microprofile/microprofile.h6
-rw-r--r--src/common/common_funcs.h2
-rw-r--r--src/common/hash.h35
-rw-r--r--src/core/file_sys/xts_archive.cpp3
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/engines/maxwell_3d.cpp3
-rw-r--r--src/video_core/engines/shader_bytecode.h22
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp76
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp49
-rw-r--r--src/video_core/shader/decode.cpp4
-rw-r--r--src/video_core/shader/decode/warp.cpp79
-rw-r--r--src/video_core/shader/node.h12
-rw-r--r--src/video_core/shader/shader_ir.h6
-rw-r--r--src/video_core/textures/astc.cpp73
-rw-r--r--src/video_core/textures/texture.h7
-rw-r--r--src/video_core/video_core.cpp2
24 files changed, 227 insertions, 224 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh
index b80868635..e46ee0abb 100644
--- a/.ci/scripts/common/post-upload.sh
+++ b/.ci/scripts/common/post-upload.sh
@@ -4,9 +4,13 @@
4cp license.txt "$DIR_NAME" 4cp license.txt "$DIR_NAME"
5cp README.md "$DIR_NAME" 5cp README.md "$DIR_NAME"
6 6
7tar -cJvf "${REV_NAME}-source.tar.xz" src externals CMakeLists.txt README.md license.txt
8cp "${REV_NAME}-source.tar.xz" "$DIR_NAME"
9
7tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$DIR_NAME" 10tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$DIR_NAME"
8 11
9mv "$DIR_NAME" $RELEASE_NAME 12mv "$DIR_NAME" $RELEASE_NAME
13mv "${REV_NAME}-source.tar.xz" $RELEASE_NAME
10 14
117z a "$REV_NAME.7z" $RELEASE_NAME 157z a "$REV_NAME.7z" $RELEASE_NAME
12 16
diff --git a/.ci/scripts/windows/upload.ps1 b/.ci/scripts/windows/upload.ps1
index e34e6681f..b9b8b4af8 100644
--- a/.ci/scripts/windows/upload.ps1
+++ b/.ci/scripts/windows/upload.ps1
@@ -1,11 +1,12 @@
1param($BUILD_NAME) 1param($BUILD_NAME)
2 2
3$GITDATE = $(git show -s --date=short --format='%ad') -replace "-","" 3$GITDATE = $(git show -s --date=short --format='%ad') -replace "-", ""
4$GITREV = $(git show -s --format='%h') 4$GITREV = $(git show -s --format='%h')
5 5
6if ("$BUILD_NAME" -eq "mainline") { 6if ("$BUILD_NAME" -eq "mainline") {
7 $RELEASE_DIST = "yuzu-windows-msvc" 7 $RELEASE_DIST = "yuzu-windows-msvc"
8} else { 8}
9else {
9 $RELEASE_DIST = "yuzu-windows-msvc-$BUILD_NAME" 10 $RELEASE_DIST = "yuzu-windows-msvc-$BUILD_NAME"
10} 11}
11 12
@@ -14,6 +15,9 @@ $MSVC_BUILD_PDB = "yuzu-windows-msvc-$GITDATE-$GITREV-debugsymbols.zip" -replace
14$MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", "" 15$MSVC_SEVENZIP = "yuzu-windows-msvc-$GITDATE-$GITREV.7z" -replace " ", ""
15$MSVC_TAR = "yuzu-windows-msvc-$GITDATE-$GITREV.tar" -replace " ", "" 16$MSVC_TAR = "yuzu-windows-msvc-$GITDATE-$GITREV.tar" -replace " ", ""
16$MSVC_TARXZ = "yuzu-windows-msvc-$GITDATE-$GITREV.tar.xz" -replace " ", "" 17$MSVC_TARXZ = "yuzu-windows-msvc-$GITDATE-$GITREV.tar.xz" -replace " ", ""
18$MSVC_SOURCE = "yuzu-windows-msvc-source-$GITDATE-$GITREV" -replace " ", ""
19$MSVC_SOURCE_TAR = "$MSVC_SOURCE.tar"
20$MSVC_SOURCE_TARXZ = "$MSVC_SOURCE_TAR.xz"
17 21
18$env:BUILD_ZIP = $MSVC_BUILD_ZIP 22$env:BUILD_ZIP = $MSVC_BUILD_ZIP
19$env:BUILD_SYMBOLS = $MSVC_BUILD_PDB 23$env:BUILD_SYMBOLS = $MSVC_BUILD_PDB
@@ -21,19 +25,33 @@ $env:BUILD_UPDATE = $MSVC_SEVENZIP
21 25
22$BUILD_DIR = ".\build\bin\Release" 26$BUILD_DIR = ".\build\bin\Release"
23 27
28# Upload debugging symbols
24mkdir pdb 29mkdir pdb
25Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb 30Get-ChildItem "$BUILD_DIR\" -Recurse -Filter "*.pdb" | Copy-Item -destination .\pdb
267z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb 317z a -tzip $MSVC_BUILD_PDB .\pdb\*.pdb
27rm "$BUILD_DIR\*.pdb" 32rm "$BUILD_DIR\*.pdb"
33
34# Create artifact directories
28mkdir $RELEASE_DIST 35mkdir $RELEASE_DIST
36mkdir $MSVC_SOURCE
29mkdir "artifacts" 37mkdir "artifacts"
30 38
39# Build a tar.xz for the source of the release
40Copy-Item .\license.txt -Destination $MSVC_SOURCE
41Copy-Item .\README.md -Destination $MSVC_SOURCE
42Copy-Item .\src -Recurse -Destination $MSVC_SOURCE
43Copy-Item .\externals -Recurse -Destination $MSVC_SOURCE
44Copy-Item .\dist -Recurse -Destination $MSVC_SOURCE
45Copy-Item .\CMakeModules -Recurse -Destination $MSVC_SOURCE
467z a -r -ttar $MSVC_SOURCE_TAR $MSVC_SOURCE
477z a -r -txz $MSVC_SOURCE_TARXZ $MSVC_SOURCE_TAR
48
49# Build the final release artifacts
50Copy-Item $MSVC_SOURCE_TARXZ -Destination $RELEASE_DIST
31Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse 51Copy-Item "$BUILD_DIR\*" -Destination $RELEASE_DIST -Recurse
32rm "$RELEASE_DIST\*.exe" 52rm "$RELEASE_DIST\*.exe"
33Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST 53Get-ChildItem "$BUILD_DIR" -Recurse -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST
34Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST 54Get-ChildItem "$BUILD_DIR" -Recurse -Filter "QtWebEngineProcess*.exe" | Copy-Item -destination $RELEASE_DIST
35Copy-Item .\license.txt -Destination $RELEASE_DIST
36Copy-Item .\README.md -Destination $RELEASE_DIST
377z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\* 557z a -tzip $MSVC_BUILD_ZIP $RELEASE_DIST\*
387z a $MSVC_SEVENZIP $RELEASE_DIST 567z a $MSVC_SEVENZIP $RELEASE_DIST
39 57
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ccb8fc5d7..5d4b6f9da 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1 +1 @@
**The Contributor's Guide has moved to [the Yuzu wiki](https://github.com/yuzu-emu/yuzu/wiki/Contributing).** **The Contributor's Guide has moved to [the yuzu wiki](https://github.com/yuzu-emu/yuzu/wiki/Contributing).**
diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h
index 384863ccc..cdb312b87 100644
--- a/externals/microprofile/microprofile.h
+++ b/externals/microprofile/microprofile.h
@@ -814,7 +814,7 @@ struct MicroProfile
814 814
815inline int MicroProfileLogType(MicroProfileLogEntry Index) 815inline int MicroProfileLogType(MicroProfileLogEntry Index)
816{ 816{
817 return ((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3; 817 return (int)(((MP_LOG_BEGIN_MASK & Index)>>62) & 0x3ULL);
818} 818}
819 819
820inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index) 820inline uint64_t MicroProfileLogTimerIndex(MicroProfileLogEntry Index)
@@ -861,12 +861,12 @@ T MicroProfileMax(T a, T b)
861 861
862inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond) 862inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
863{ 863{
864 return (int64_t)(fMs*0.001f*nTicksPerSecond); 864 return (int64_t)(fMs*0.001f*(float)nTicksPerSecond);
865} 865}
866 866
867inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond) 867inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
868{ 868{
869 return 1000.f / nTicksPerSecond; 869 return 1000.f / (float)nTicksPerSecond;
870} 870}
871 871
872inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t) 872inline uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index c029dc7b3..6dc3e108f 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -56,7 +56,7 @@ std::string GetLastErrorMsg();
56namespace Common { 56namespace Common {
57 57
58constexpr u32 MakeMagic(char a, char b, char c, char d) { 58constexpr u32 MakeMagic(char a, char b, char c, char d) {
59 return a | b << 8 | c << 16 | d << 24; 59 return u32(a) | u32(b) << 8 | u32(c) << 16 | u32(d) << 24;
60} 60}
61 61
62} // namespace Common 62} // namespace Common
diff --git a/src/common/hash.h b/src/common/hash.h
index ebd4125e2..b2538f3ea 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -35,41 +35,6 @@ static inline u64 ComputeStructHash64(const T& data) {
35 return ComputeHash64(&data, sizeof(data)); 35 return ComputeHash64(&data, sizeof(data));
36} 36}
37 37
38/// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
39template <typename T>
40struct HashableStruct {
41 // In addition to being trivially copyable, T must also have a trivial default constructor,
42 // because any member initialization would be overridden by memset
43 static_assert(std::is_trivial_v<T>, "Type passed to HashableStruct must be trivial");
44 /*
45 * We use a union because "implicitly-defined copy/move constructor for a union X copies the
46 * object representation of X." and "implicitly-defined copy assignment operator for a union X
47 * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy.
48 * This is important because the padding bytes are included in the hash and comparison between
49 * objects.
50 */
51 union {
52 T state;
53 };
54
55 HashableStruct() {
56 // Memset structure to zero padding bits, so that they will be deterministic when hashing
57 std::memset(&state, 0, sizeof(T));
58 }
59
60 bool operator==(const HashableStruct<T>& o) const {
61 return std::memcmp(&state, &o.state, sizeof(T)) == 0;
62 };
63
64 bool operator!=(const HashableStruct<T>& o) const {
65 return !(*this == o);
66 };
67
68 std::size_t Hash() const {
69 return Common::ComputeStructHash64(state);
70 }
71};
72
73struct PairHash { 38struct PairHash {
74 template <class T1, class T2> 39 template <class T1, class T2>
75 std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { 40 std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp
index 4bc5cb2ee..f5f8b91c9 100644
--- a/src/core/file_sys/xts_archive.cpp
+++ b/src/core/file_sys/xts_archive.cpp
@@ -93,8 +93,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) {
93 std::size_t i = 0; 93 std::size_t i = 0;
94 for (; i < sd_keys.size(); ++i) { 94 for (; i < sd_keys.size(); ++i) {
95 std::array<Core::Crypto::Key128, 2> nax_keys{}; 95 std::array<Core::Crypto::Key128, 2> nax_keys{};
96 if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, std::string(path).c_str(), 96 if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, path.data(), path.size())) {
97 path.size())) {
98 return Loader::ResultStatus::ErrorNAXKeyHMACFailed; 97 return Loader::ResultStatus::ErrorNAXKeyHMACFailed;
99 } 98 }
100 99
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c911c6ec4..45d8eaf23 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -180,3 +180,9 @@ target_link_libraries(video_core PRIVATE glad)
180if (ENABLE_VULKAN) 180if (ENABLE_VULKAN)
181 target_link_libraries(video_core PRIVATE sirit) 181 target_link_libraries(video_core PRIVATE sirit)
182endif() 182endif()
183
184if (MSVC)
185 target_compile_options(video_core PRIVATE /we4267)
186else()
187 target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
188endif()
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2bed6cb38..42ce49a4d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -261,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
261 executing_macro = 0; 261 executing_macro = 0;
262 262
263 // Lookup the macro offset 263 // Lookup the macro offset
264 const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); 264 const u32 entry =
265 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
265 266
266 // Execute the current macro. 267 // Execute the current macro.
267 macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); 268 macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8f6bc76eb..9fafed4a2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -616,6 +616,14 @@ union Instruction {
616 } shfl; 616 } shfl;
617 617
618 union { 618 union {
619 BitField<44, 1, u64> ftz;
620 BitField<39, 2, u64> tab5cb8_2;
621 BitField<38, 1, u64> ndv;
622 BitField<47, 1, u64> cc;
623 BitField<28, 8, u64> swizzle;
624 } fswzadd;
625
626 union {
619 BitField<8, 8, Register> gpr; 627 BitField<8, 8, Register> gpr;
620 BitField<20, 24, s64> offset; 628 BitField<20, 24, s64> offset;
621 } gmem; 629 } gmem;
@@ -1478,7 +1486,8 @@ union Instruction {
1478 u32 value = static_cast<u32>(target); 1486 u32 value = static_cast<u32>(target);
1479 // The branch offset is relative to the next instruction and is stored in bytes, so 1487 // The branch offset is relative to the next instruction and is stored in bytes, so
1480 // divide it by the size of an instruction and add 1 to it. 1488 // divide it by the size of an instruction and add 1 to it.
1481 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; 1489 return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
1490 1;
1482 } 1491 }
1483 } bra; 1492 } bra;
1484 1493
@@ -1492,7 +1501,8 @@ union Instruction {
1492 u32 value = static_cast<u32>(target); 1501 u32 value = static_cast<u32>(target);
1493 // The branch offset is relative to the next instruction and is stored in bytes, so 1502 // The branch offset is relative to the next instruction and is stored in bytes, so
1494 // divide it by the size of an instruction and add 1 to it. 1503 // divide it by the size of an instruction and add 1 to it.
1495 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; 1504 return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
1505 1;
1496 } 1506 }
1497 } brx; 1507 } brx;
1498 1508
@@ -1590,6 +1600,7 @@ public:
1590 DEPBAR, 1600 DEPBAR,
1591 VOTE, 1601 VOTE,
1592 SHFL, 1602 SHFL,
1603 FSWZADD,
1593 BFE_C, 1604 BFE_C,
1594 BFE_R, 1605 BFE_R,
1595 BFE_IMM, 1606 BFE_IMM,
@@ -1851,11 +1862,11 @@ private:
1851 const std::size_t bit_position = opcode_bitsize - i - 1; 1862 const std::size_t bit_position = opcode_bitsize - i - 1;
1852 switch (bitstring[i]) { 1863 switch (bitstring[i]) {
1853 case '0': 1864 case '0':
1854 mask |= 1 << bit_position; 1865 mask |= static_cast<u16>(1U << bit_position);
1855 break; 1866 break;
1856 case '1': 1867 case '1':
1857 expect |= 1 << bit_position; 1868 expect |= static_cast<u16>(1U << bit_position);
1858 mask |= 1 << bit_position; 1869 mask |= static_cast<u16>(1U << bit_position);
1859 break; 1870 break;
1860 default: 1871 default:
1861 // Ignore 1872 // Ignore
@@ -1888,6 +1899,7 @@ private:
1888 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1899 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1889 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 1900 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1890 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), 1901 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
1902 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
1891 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1903 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1892 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1904 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1893 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1905 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index c65b24c69..b30d5be74 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -62,6 +62,7 @@ Device::Device() {
62 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 62 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
63 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 63 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
64 GLAD_GL_NV_shader_thread_shuffle; 64 GLAD_GL_NV_shader_thread_shuffle;
65 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
65 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 66 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
66 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 67 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
67 has_variable_aoffi = TestVariableAoffi(); 68 has_variable_aoffi = TestVariableAoffi();
@@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) {
79 max_vertex_attributes = 16; 80 max_vertex_attributes = 16;
80 max_varyings = 15; 81 max_varyings = 15;
81 has_warp_intrinsics = true; 82 has_warp_intrinsics = true;
83 has_shader_ballot = true;
82 has_vertex_viewport_layer = true; 84 has_vertex_viewport_layer = true;
83 has_image_load_formatted = true; 85 has_image_load_formatted = true;
84 has_variable_aoffi = true; 86 has_variable_aoffi = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bf35bd0b6..6c86fe207 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
34 return has_warp_intrinsics; 34 return has_warp_intrinsics;
35 } 35 }
36 36
37 bool HasShaderBallot() const {
38 return has_shader_ballot;
39 }
40
37 bool HasVertexViewportLayer() const { 41 bool HasVertexViewportLayer() const {
38 return has_vertex_viewport_layer; 42 return has_vertex_viewport_layer;
39 } 43 }
@@ -68,6 +72,7 @@ private:
68 u32 max_vertex_attributes{}; 72 u32 max_vertex_attributes{};
69 u32 max_varyings{}; 73 u32 max_varyings{};
70 bool has_warp_intrinsics{}; 74 bool has_warp_intrinsics{};
75 bool has_shader_ballot{};
71 bool has_vertex_viewport_layer{}; 76 bool has_vertex_viewport_layer{};
72 bool has_image_load_formatted{}; 77 bool has_image_load_formatted{};
73 bool has_variable_aoffi{}; 78 bool has_variable_aoffi{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e560d70d5..e43ba9d6b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -375,7 +375,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
375 fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); 375 fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
376 fbkey.colors[index] = std::move(color_surface); 376 fbkey.colors[index] = std::move(color_surface);
377 } 377 }
378 fbkey.colors_count = regs.rt_control.count; 378 fbkey.colors_count = static_cast<u16>(regs.rt_control.count);
379 379
380 if (depth_surface) { 380 if (depth_surface) {
381 // Assume that a surface will be written to if it is used as a framebuffer, even if 381 // Assume that a surface will be written to if it is used as a framebuffer, even if
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 35e5214a5..04a239a39 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
275 std::string source = fmt::format(R"(// {} 275 std::string source = fmt::format(R"(// {}
276#version 430 core 276#version 430 core
277#extension GL_ARB_separate_shader_objects : enable 277#extension GL_ARB_separate_shader_objects : enable
278#extension GL_ARB_shader_viewport_layer_array : enable
279#extension GL_EXT_shader_image_load_formatted : enable
280#extension GL_NV_gpu_shader5 : enable
281#extension GL_NV_shader_thread_group : enable
282#extension GL_NV_shader_thread_shuffle : enable
283)", 278)",
284 GetShaderId(unique_identifier, program_type)); 279 GetShaderId(unique_identifier, program_type));
285 if (is_compute) { 280 if (is_compute) {
286 source += "#extension GL_ARB_compute_variable_group_size : require\n"; 281 source += "#extension GL_ARB_compute_variable_group_size : require\n";
287 } 282 }
283 if (device.HasShaderBallot()) {
284 source += "#extension GL_ARB_shader_ballot : require\n";
285 }
286 if (device.HasVertexViewportLayer()) {
287 source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
288 }
289 if (device.HasImageLoadFormatted()) {
290 source += "#extension GL_EXT_shader_image_load_formatted : require\n";
291 }
292 if (device.HasWarpIntrinsics()) {
293 source += "#extension GL_NV_gpu_shader5 : require\n"
294 "#extension GL_NV_shader_thread_group : require\n"
295 "#extension GL_NV_shader_thread_shuffle : require\n";
296 }
288 source += '\n'; 297 source += '\n';
289 298
290 if (!is_compute) { 299 if (!is_compute) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0ce59a852..e56ed51de 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1379,6 +1379,26 @@ private:
1379 return GenerateUnary(operation, "float", Type::Float, type); 1379 return GenerateUnary(operation, "float", Type::Float, type);
1380 } 1380 }
1381 1381
1382 Expression FSwizzleAdd(Operation operation) {
1383 const std::string op_a = VisitOperand(operation, 0).AsFloat();
1384 const std::string op_b = VisitOperand(operation, 1).AsFloat();
1385
1386 if (!device.HasShaderBallot()) {
1387 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1388 return {fmt::format("{} + {}", op_a, op_b), Type::Float};
1389 }
1390
1391 const std::string instr_mask = VisitOperand(operation, 2).AsUint();
1392 const std::string mask = code.GenerateTemporary();
1393 code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
1394 instr_mask);
1395
1396 const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
1397 const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
1398 return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
1399 Type::Float};
1400 }
1401
1382 Expression ICastFloat(Operation operation) { 1402 Expression ICastFloat(Operation operation) {
1383 return GenerateUnary(operation, "int", Type::Int, Type::Float); 1403 return GenerateUnary(operation, "int", Type::Int, Type::Float);
1384 } 1404 }
@@ -1942,34 +1962,24 @@ private:
1942 return Vote(operation, "allThreadsEqualNV"); 1962 return Vote(operation, "allThreadsEqualNV");
1943 } 1963 }
1944 1964
1945 template <const std::string_view& func> 1965 Expression ThreadId(Operation operation) {
1946 Expression Shuffle(Operation operation) { 1966 if (!device.HasShaderBallot()) {
1947 const std::string value = VisitOperand(operation, 0).AsFloat(); 1967 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1948 if (!device.HasWarpIntrinsics()) { 1968 return {"0U", Type::Uint};
1949 LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
1950 // On a "single-thread" device we are either on the same thread or out of bounds. Both
1951 // cases return the passed value.
1952 return {value, Type::Float};
1953 } 1969 }
1954 1970 return {"gl_SubGroupInvocationARB", Type::Uint};
1955 const std::string index = VisitOperand(operation, 1).AsUint();
1956 const std::string width = VisitOperand(operation, 2).AsUint();
1957 return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
1958 } 1971 }
1959 1972
1960 template <const std::string_view& func> 1973 Expression ShuffleIndexed(Operation operation) {
1961 Expression InRangeShuffle(Operation operation) { 1974 std::string value = VisitOperand(operation, 0).AsFloat();
1962 const std::string index = VisitOperand(operation, 0).AsUint(); 1975
1963 const std::string width = VisitOperand(operation, 1).AsUint(); 1976 if (!device.HasShaderBallot()) {
1964 if (!device.HasWarpIntrinsics()) { 1977 LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
1965 // On a "single-thread" device we are only in bounds when the requested index is 0. 1978 return {std::move(value), Type::Float};
1966 return {fmt::format("({} == 0U)", index), Type::Bool};
1967 } 1979 }
1968 1980
1969 const std::string in_range = code.GenerateTemporary(); 1981 const std::string index = VisitOperand(operation, 1).AsUint();
1970 code.AddLine("bool {};", in_range); 1982 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
1971 code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
1972 return {in_range, Type::Bool};
1973 } 1983 }
1974 1984
1975 struct Func final { 1985 struct Func final {
@@ -1981,11 +1991,6 @@ private:
1981 static constexpr std::string_view Or = "Or"; 1991 static constexpr std::string_view Or = "Or";
1982 static constexpr std::string_view Xor = "Xor"; 1992 static constexpr std::string_view Xor = "Xor";
1983 static constexpr std::string_view Exchange = "Exchange"; 1993 static constexpr std::string_view Exchange = "Exchange";
1984
1985 static constexpr std::string_view ShuffleIndexed = "shuffleNV";
1986 static constexpr std::string_view ShuffleUp = "shuffleUpNV";
1987 static constexpr std::string_view ShuffleDown = "shuffleDownNV";
1988 static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
1989 }; 1994 };
1990 1995
1991 static constexpr std::array operation_decompilers = { 1996 static constexpr std::array operation_decompilers = {
@@ -2016,6 +2021,7 @@ private:
2016 &GLSLDecompiler::FTrunc, 2021 &GLSLDecompiler::FTrunc,
2017 &GLSLDecompiler::FCastInteger<Type::Int>, 2022 &GLSLDecompiler::FCastInteger<Type::Int>,
2018 &GLSLDecompiler::FCastInteger<Type::Uint>, 2023 &GLSLDecompiler::FCastInteger<Type::Uint>,
2024 &GLSLDecompiler::FSwizzleAdd,
2019 2025
2020 &GLSLDecompiler::Add<Type::Int>, 2026 &GLSLDecompiler::Add<Type::Int>,
2021 &GLSLDecompiler::Mul<Type::Int>, 2027 &GLSLDecompiler::Mul<Type::Int>,
@@ -2151,15 +2157,8 @@ private:
2151 &GLSLDecompiler::VoteAny, 2157 &GLSLDecompiler::VoteAny,
2152 &GLSLDecompiler::VoteEqual, 2158 &GLSLDecompiler::VoteEqual,
2153 2159
2154 &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, 2160 &GLSLDecompiler::ThreadId,
2155 &GLSLDecompiler::Shuffle<Func::ShuffleUp>, 2161 &GLSLDecompiler::ShuffleIndexed,
2156 &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
2157 &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
2158
2159 &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
2160 &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
2161 &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
2162 &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
2163 }; 2162 };
2164 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2163 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2165 2164
@@ -2492,6 +2491,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2492 bvec2 is_nan2 = isnan(pair2); 2491 bvec2 is_nan2 = isnan(pair2);
2493 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); 2492 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2494} 2493}
2494
2495const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
2496const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
2495)"; 2497)";
2496} 2498}
2497 2499
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 4bbd17b12..7646cbb0e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -323,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
323 // (e.g. handheld mode) on a 1920x1080 framebuffer. 323 // (e.g. handheld mode) on a 1920x1080 framebuffer.
324 f32 scale_u = 1.f, scale_v = 1.f; 324 f32 scale_u = 1.f, scale_v = 1.f;
325 if (framebuffer_crop_rect.GetWidth() > 0) { 325 if (framebuffer_crop_rect.GetWidth() > 0) {
326 scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width; 326 scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
327 static_cast<f32>(screen_info.texture.width);
327 } 328 }
328 if (framebuffer_crop_rect.GetHeight() > 0) { 329 if (framebuffer_crop_rect.GetHeight() > 0) {
329 scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height; 330 scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
331 static_cast<f32>(screen_info.texture.height);
330 } 332 }
331 333
332 std::array<ScreenRectVertex, 4> vertices = {{ 334 std::array<ScreenRectVertex, 4> vertices = {{
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 42cf068b6..2850d5b59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -783,6 +783,11 @@ private:
783 return {}; 783 return {};
784 } 784 }
785 785
786 Id FSwizzleAdd(Operation operation) {
787 UNIMPLEMENTED();
788 return {};
789 }
790
786 Id HNegate(Operation operation) { 791 Id HNegate(Operation operation) {
787 UNIMPLEMENTED(); 792 UNIMPLEMENTED();
788 return {}; 793 return {};
@@ -1195,42 +1200,12 @@ private:
1195 return {}; 1200 return {};
1196 } 1201 }
1197 1202
1198 Id ShuffleIndexed(Operation) { 1203 Id ThreadId(Operation) {
1199 UNIMPLEMENTED();
1200 return {};
1201 }
1202
1203 Id ShuffleUp(Operation) {
1204 UNIMPLEMENTED();
1205 return {};
1206 }
1207
1208 Id ShuffleDown(Operation) {
1209 UNIMPLEMENTED();
1210 return {};
1211 }
1212
1213 Id ShuffleButterfly(Operation) {
1214 UNIMPLEMENTED();
1215 return {};
1216 }
1217
1218 Id InRangeShuffleIndexed(Operation) {
1219 UNIMPLEMENTED(); 1204 UNIMPLEMENTED();
1220 return {}; 1205 return {};
1221 } 1206 }
1222 1207
1223 Id InRangeShuffleUp(Operation) { 1208 Id ShuffleIndexed(Operation) {
1224 UNIMPLEMENTED();
1225 return {};
1226 }
1227
1228 Id InRangeShuffleDown(Operation) {
1229 UNIMPLEMENTED();
1230 return {};
1231 }
1232
1233 Id InRangeShuffleButterfly(Operation) {
1234 UNIMPLEMENTED(); 1209 UNIMPLEMENTED();
1235 return {}; 1210 return {};
1236 } 1211 }
@@ -1393,6 +1368,7 @@ private:
1393 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, 1368 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
1394 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, 1369 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
1395 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, 1370 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
1371 &SPIRVDecompiler::FSwizzleAdd,
1396 1372
1397 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, 1373 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
1398 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, 1374 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
@@ -1528,15 +1504,8 @@ private:
1528 &SPIRVDecompiler::VoteAny, 1504 &SPIRVDecompiler::VoteAny,
1529 &SPIRVDecompiler::VoteEqual, 1505 &SPIRVDecompiler::VoteEqual,
1530 1506
1507 &SPIRVDecompiler::ThreadId,
1531 &SPIRVDecompiler::ShuffleIndexed, 1508 &SPIRVDecompiler::ShuffleIndexed,
1532 &SPIRVDecompiler::ShuffleUp,
1533 &SPIRVDecompiler::ShuffleDown,
1534 &SPIRVDecompiler::ShuffleButterfly,
1535
1536 &SPIRVDecompiler::InRangeShuffleIndexed,
1537 &SPIRVDecompiler::InRangeShuffleUp,
1538 &SPIRVDecompiler::InRangeShuffleDown,
1539 &SPIRVDecompiler::InRangeShuffleButterfly,
1540 }; 1509 };
1541 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 1510 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1542 1511
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 21fb9cb83..22c3e5120 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -154,10 +154,10 @@ void ShaderIR::Decode() {
154 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); 154 LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
155 [[fallthrough]]; 155 [[fallthrough]];
156 case CompileDepth::BruteForce: { 156 case CompileDepth::BruteForce: {
157 const auto shader_end = static_cast<u32>(program_code.size());
157 coverage_begin = main_offset; 158 coverage_begin = main_offset;
158 const std::size_t shader_end = program_code.size();
159 coverage_end = shader_end; 159 coverage_end = shader_end;
160 for (u32 label = main_offset; label < shader_end; label++) { 160 for (u32 label = main_offset; label < shader_end; ++label) {
161 basic_blocks.insert({label, DecodeRange(label, label + 1)}); 161 basic_blocks.insert({label, DecodeRange(label, label + 1)});
162 } 162 }
163 break; 163 break;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index fa8a250cc..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
18 18
19namespace { 19namespace {
20
20OperationCode GetOperationCode(VoteOperation vote_op) { 21OperationCode GetOperationCode(VoteOperation vote_op) {
21 switch (vote_op) { 22 switch (vote_op) {
22 case VoteOperation::All: 23 case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
30 return OperationCode::VoteAll; 31 return OperationCode::VoteAll;
31 } 32 }
32} 33}
34
33} // Anonymous namespace 35} // Anonymous namespace
34 36
35u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { 37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
46 break; 48 break;
47 } 49 }
48 case OpCode::Id::SHFL: { 50 case OpCode::Id::SHFL: {
49 Node width = [this, instr] { 51 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) 52 : GetRegister(instr.gpr39);
51 : GetRegister(instr.gpr39); 53 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
52 54 : GetRegister(instr.gpr20);
53 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has 55
54 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having 56 Node thread_id = Operation(OperationCode::ThreadId);
55 // different parameters that don't properly map to GLSL's interface, but it should work 57 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
56 // for cases emitted by Nvidia's compiler. 58 Node seg_mask = BitfieldExtract(mask, 8, 16);
57 if (instr.shfl.operation == ShuffleOperation::Up) {
58 return Operation(
59 OperationCode::ILogicalShiftRight,
60 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
61 Immediate(8));
62 } else {
63 return Operation(OperationCode::ILogicalShiftRight,
64 Operation(OperationCode::IAdd, Immediate(0x201F),
65 Operation(OperationCode::INegate, std::move(mask))),
66 Immediate(8));
67 }
68 }();
69 59
70 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { 60 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
61 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
62 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
63 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
64
65 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
71 switch (instr.shfl.operation) { 66 switch (instr.shfl.operation) {
72 case ShuffleOperation::Idx: 67 case ShuffleOperation::Idx:
73 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; 68 return Operation(OperationCode::IBitwiseOr,
74 case ShuffleOperation::Up: 69 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
75 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; 70 min_thread_id);
76 case ShuffleOperation::Down: 71 case ShuffleOperation::Down:
77 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; 72 return Operation(OperationCode::IAdd, thread_id, index);
73 case ShuffleOperation::Up:
74 return Operation(OperationCode::IAdd, thread_id,
75 Operation(OperationCode::INegate, index));
78 case ShuffleOperation::Bfly: 76 case ShuffleOperation::Bfly:
79 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; 77 return Operation(OperationCode::IBitwiseXor, thread_id, index);
80 } 78 }
81 UNREACHABLE_MSG("Invalid SHFL operation: {}", 79 UNREACHABLE();
82 static_cast<u64>(instr.shfl.operation.Value())); 80 return Immediate(0U);
83 return {};
84 }(); 81 }();
85 82
86 // Setting the predicate before the register is intentional to avoid overwriting. 83 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) 84 if (instr.shfl.operation == ShuffleOperation::Up) {
88 : GetRegister(instr.gpr20); 85 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); 86 } else {
87 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
88 }
89 }();
90
91 SetPredicate(bb, instr.shfl.pred48, in_bounds);
90 SetRegister( 92 SetRegister(
91 bb, instr.gpr0, 93 bb, instr.gpr0,
92 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); 94 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
95 break;
96 }
97 case OpCode::Id::FSWZADD: {
98 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
99
100 Node op_a = GetRegister(instr.gpr8);
101 Node op_b = GetRegister(instr.gpr20);
102 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
103 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
93 break; 104 break;
94 } 105 }
95 default: 106 default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4300d9ff4..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
47 FTrunc, /// (MetaArithmetic, float a) -> float 47 FTrunc, /// (MetaArithmetic, float a) -> float
48 FCastInteger, /// (MetaArithmetic, int a) -> float 48 FCastInteger, /// (MetaArithmetic, int a) -> float
49 FCastUInteger, /// (MetaArithmetic, uint a) -> float 49 FCastUInteger, /// (MetaArithmetic, uint a) -> float
50 FSwizzleAdd, /// (float a, float b, uint mask) -> float
50 51
51 IAdd, /// (MetaArithmetic, int a, int b) -> int 52 IAdd, /// (MetaArithmetic, int a, int b) -> int
52 IMul, /// (MetaArithmetic, int a, int b) -> int 53 IMul, /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 182 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 183 VoteEqual, /// (bool) -> bool
183 184
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint 185 ThreadId, /// () -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint 186 ShuffleIndexed, /// (uint value, uint index) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193 187
194 Amount, 188 Amount,
195}; 189};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 26c8fde22..76a849818 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -49,7 +49,7 @@ public:
49 } 49 }
50 50
51 u32 GetSize() const { 51 u32 GetSize() const {
52 return max_offset + sizeof(float); 52 return max_offset + static_cast<u32>(sizeof(float));
53 } 53 }
54 54
55 u32 GetMaxOffset() const { 55 u32 GetMaxOffset() const {
@@ -165,8 +165,8 @@ public:
165 return program_manager.GetVariables(); 165 return program_manager.GetVariables();
166 } 166 }
167 167
168 u32 ConvertAddressToNvidiaSpace(const u32 address) const { 168 u32 ConvertAddressToNvidiaSpace(u32 address) const {
169 return (address - main_offset) * sizeof(Tegra::Shader::Instruction); 169 return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
170 } 170 }
171 171
172 /// Returns a condition code evaluated from internal flags 172 /// Returns a condition code evaluated from internal flags
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 58b608a36..33bd31865 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -92,11 +92,11 @@ private:
92 const unsigned int mask = 1 << m_NextBit++; 92 const unsigned int mask = 1 << m_NextBit++;
93 93
94 // clear the bit 94 // clear the bit
95 *m_CurByte &= ~mask; 95 *m_CurByte &= static_cast<unsigned char>(~mask);
96 96
97 // Write the bit, if necessary 97 // Write the bit, if necessary
98 if (b) 98 if (b)
99 *m_CurByte |= mask; 99 *m_CurByte |= static_cast<unsigned char>(mask);
100 100
101 // Next byte? 101 // Next byte?
102 if (m_NextBit >= 8) { 102 if (m_NextBit >= 8) {
@@ -137,7 +137,7 @@ public:
137 } 137 }
138 138
139 uint64_t mask = (1 << (end - start + 1)) - 1; 139 uint64_t mask = (1 << (end - start + 1)) - 1;
140 return (m_Bits >> start) & mask; 140 return (m_Bits >> start) & static_cast<IntType>(mask);
141 } 141 }
142 142
143private: 143private:
@@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
656 return 0; 656 return 0;
657 if (toBit == 0) 657 if (toBit == 0)
658 return 0; 658 return 0;
659 IntType v = val & ((1 << numBits) - 1); 659 IntType v = val & static_cast<IntType>((1 << numBits) - 1);
660 IntType res = v; 660 IntType res = v;
661 uint32_t reslen = numBits; 661 uint32_t reslen = numBits;
662 while (reslen < toBit) { 662 while (reslen < toBit) {
@@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
666 comp = numBits - newshift; 666 comp = numBits - newshift;
667 numBits = newshift; 667 numBits = newshift;
668 } 668 }
669 res <<= numBits; 669 res = static_cast<IntType>(res << numBits);
670 res |= v >> comp; 670 res = static_cast<IntType>(res | (v >> comp));
671 reslen += numBits; 671 reslen += numBits;
672 } 672 }
673 return res; 673 return res;
@@ -714,7 +714,7 @@ public:
714 // Do nothing 714 // Do nothing
715 return val; 715 return val;
716 } else if (oldDepth == 0 && newDepth != 0) { 716 } else if (oldDepth == 0 && newDepth != 0) {
717 return (1 << newDepth) - 1; 717 return static_cast<ChannelType>((1 << newDepth) - 1);
718 } else if (newDepth > oldDepth) { 718 } else if (newDepth > oldDepth) {
719 return Replicate(val, oldDepth, newDepth); 719 return Replicate(val, oldDepth, newDepth);
720 } else { 720 } else {
@@ -722,10 +722,11 @@ public:
722 if (newDepth == 0) { 722 if (newDepth == 0) {
723 return 0xFF; 723 return 0xFF;
724 } else { 724 } else {
725 uint8_t bitsWasted = oldDepth - newDepth; 725 uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth);
726 uint16_t v = static_cast<uint16_t>(val); 726 uint16_t v = static_cast<uint16_t>(val);
727 v = (v + (1 << (bitsWasted - 1))) >> bitsWasted; 727 v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
728 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1); 728 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v),
729 static_cast<uint16_t>((1 << newDepth) - 1));
729 return static_cast<uint8_t>(v); 730 return static_cast<uint8_t>(v);
730 } 731 }
731 } 732 }
@@ -1191,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1191 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); 1192 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
1192 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); 1193 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
1193 1194
1194 seed1 *= seed1; 1195 seed1 = static_cast<uint8_t>(seed1 * seed1);
1195 seed2 *= seed2; 1196 seed2 = static_cast<uint8_t>(seed2 * seed2);
1196 seed3 *= seed3; 1197 seed3 = static_cast<uint8_t>(seed3 * seed3);
1197 seed4 *= seed4; 1198 seed4 = static_cast<uint8_t>(seed4 * seed4);
1198 seed5 *= seed5; 1199 seed5 = static_cast<uint8_t>(seed5 * seed5);
1199 seed6 *= seed6; 1200 seed6 = static_cast<uint8_t>(seed6 * seed6);
1200 seed7 *= seed7; 1201 seed7 = static_cast<uint8_t>(seed7 * seed7);
1201 seed8 *= seed8; 1202 seed8 = static_cast<uint8_t>(seed8 * seed8);
1202 seed9 *= seed9; 1203 seed9 = static_cast<uint8_t>(seed9 * seed9);
1203 seed10 *= seed10; 1204 seed10 = static_cast<uint8_t>(seed10 * seed10);
1204 seed11 *= seed11; 1205 seed11 = static_cast<uint8_t>(seed11 * seed11);
1205 seed12 *= seed12; 1206 seed12 = static_cast<uint8_t>(seed12 * seed12);
1206 1207
1207 int32_t sh1, sh2, sh3; 1208 int32_t sh1, sh2, sh3;
1208 if (seed & 1) { 1209 if (seed & 1) {
@@ -1214,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1214 } 1215 }
1215 sh3 = (seed & 0x10) ? sh1 : sh2; 1216 sh3 = (seed & 0x10) ? sh1 : sh2;
1216 1217
1217 seed1 >>= sh1; 1218 seed1 = static_cast<uint8_t>(seed1 >> sh1);
1218 seed2 >>= sh2; 1219 seed2 = static_cast<uint8_t>(seed2 >> sh2);
1219 seed3 >>= sh1; 1220 seed3 = static_cast<uint8_t>(seed3 >> sh1);
1220 seed4 >>= sh2; 1221 seed4 = static_cast<uint8_t>(seed4 >> sh2);
1221 seed5 >>= sh1; 1222 seed5 = static_cast<uint8_t>(seed5 >> sh1);
1222 seed6 >>= sh2; 1223 seed6 = static_cast<uint8_t>(seed6 >> sh2);
1223 seed7 >>= sh1; 1224 seed7 = static_cast<uint8_t>(seed7 >> sh1);
1224 seed8 >>= sh2; 1225 seed8 = static_cast<uint8_t>(seed8 >> sh2);
1225 seed9 >>= sh3; 1226 seed9 = static_cast<uint8_t>(seed9 >> sh3);
1226 seed10 >>= sh3; 1227 seed10 = static_cast<uint8_t>(seed10 >> sh3);
1227 seed11 >>= sh3; 1228 seed11 = static_cast<uint8_t>(seed11 >> sh3);
1228 seed12 >>= sh3; 1229 seed12 = static_cast<uint8_t>(seed12 >> sh3);
1229 1230
1230 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 1231 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1231 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 1232 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
@@ -1558,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1558 1559
1559 // Make sure that higher non-texel bits are set to zero 1560 // Make sure that higher non-texel bits are set to zero
1560 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1561 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1561 texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1; 1562 texelWeightData[clearByteStart - 1] =
1563 texelWeightData[clearByteStart - 1] &
1564 static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1562 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1565 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1563 1566
1564 std::vector<IntegerEncodedValue> texelWeightValues; 1567 std::vector<IntegerEncodedValue> texelWeightValues;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 27c8ce975..8e82c6748 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -342,13 +342,14 @@ struct TSCEntry {
342 float GetLodBias() const { 342 float GetLodBias() const {
343 // Sign extend the 13-bit value. 343 // Sign extend the 13-bit value.
344 constexpr u32 mask = 1U << (13 - 1); 344 constexpr u32 mask = 1U << (13 - 1);
345 return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f; 345 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
346 } 346 }
347 347
348 std::array<float, 4> GetBorderColor() const { 348 std::array<float, 4> GetBorderColor() const {
349 if (srgb_conversion) { 349 if (srgb_conversion) {
350 return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, 350 return {static_cast<float>(srgb_border_color_r) / 255.0f,
351 srgb_border_color_b / 255.0f, border_color[3]}; 351 static_cast<float>(srgb_border_color_g) / 255.0f,
352 static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
352 } 353 }
353 return border_color; 354 return border_color;
354 } 355 }
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 60cda0ca3..8e947394c 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
28 28
29u16 GetResolutionScaleFactor(const RendererBase& renderer) { 29u16 GetResolutionScaleFactor(const RendererBase& renderer) {
30 return static_cast<u16>( 30 return static_cast<u16>(
31 Settings::values.resolution_factor 31 Settings::values.resolution_factor != 0
32 ? Settings::values.resolution_factor 32 ? Settings::values.resolution_factor
33 : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); 33 : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
34} 34}