diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/common_funcs.h | 2 | ||||
| -rw-r--r-- | src/common/hash.h | 35 | ||||
| -rw-r--r-- | src/core/file_sys/xts_archive.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 76 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 79 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 73 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 7 | ||||
| -rw-r--r-- | src/video_core/video_core.cpp | 2 |
20 files changed, 197 insertions, 216 deletions
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index c029dc7b3..6dc3e108f 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h | |||
| @@ -56,7 +56,7 @@ std::string GetLastErrorMsg(); | |||
| 56 | namespace Common { | 56 | namespace Common { |
| 57 | 57 | ||
| 58 | constexpr u32 MakeMagic(char a, char b, char c, char d) { | 58 | constexpr u32 MakeMagic(char a, char b, char c, char d) { |
| 59 | return a | b << 8 | c << 16 | d << 24; | 59 | return u32(a) | u32(b) << 8 | u32(c) << 16 | u32(d) << 24; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | } // namespace Common | 62 | } // namespace Common |
diff --git a/src/common/hash.h b/src/common/hash.h index ebd4125e2..b2538f3ea 100644 --- a/src/common/hash.h +++ b/src/common/hash.h | |||
| @@ -35,41 +35,6 @@ static inline u64 ComputeStructHash64(const T& data) { | |||
| 35 | return ComputeHash64(&data, sizeof(data)); | 35 | return ComputeHash64(&data, sizeof(data)); |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | /// A helper template that ensures the padding in a struct is initialized by memsetting to 0. | ||
| 39 | template <typename T> | ||
| 40 | struct HashableStruct { | ||
| 41 | // In addition to being trivially copyable, T must also have a trivial default constructor, | ||
| 42 | // because any member initialization would be overridden by memset | ||
| 43 | static_assert(std::is_trivial_v<T>, "Type passed to HashableStruct must be trivial"); | ||
| 44 | /* | ||
| 45 | * We use a union because "implicitly-defined copy/move constructor for a union X copies the | ||
| 46 | * object representation of X." and "implicitly-defined copy assignment operator for a union X | ||
| 47 | * copies the object representation (3.9) of X." = Bytewise copy instead of memberwise copy. | ||
| 48 | * This is important because the padding bytes are included in the hash and comparison between | ||
| 49 | * objects. | ||
| 50 | */ | ||
| 51 | union { | ||
| 52 | T state; | ||
| 53 | }; | ||
| 54 | |||
| 55 | HashableStruct() { | ||
| 56 | // Memset structure to zero padding bits, so that they will be deterministic when hashing | ||
| 57 | std::memset(&state, 0, sizeof(T)); | ||
| 58 | } | ||
| 59 | |||
| 60 | bool operator==(const HashableStruct<T>& o) const { | ||
| 61 | return std::memcmp(&state, &o.state, sizeof(T)) == 0; | ||
| 62 | }; | ||
| 63 | |||
| 64 | bool operator!=(const HashableStruct<T>& o) const { | ||
| 65 | return !(*this == o); | ||
| 66 | }; | ||
| 67 | |||
| 68 | std::size_t Hash() const { | ||
| 69 | return Common::ComputeStructHash64(state); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct PairHash { | 38 | struct PairHash { |
| 74 | template <class T1, class T2> | 39 | template <class T1, class T2> |
| 75 | std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { | 40 | std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { |
diff --git a/src/core/file_sys/xts_archive.cpp b/src/core/file_sys/xts_archive.cpp index 4bc5cb2ee..f5f8b91c9 100644 --- a/src/core/file_sys/xts_archive.cpp +++ b/src/core/file_sys/xts_archive.cpp | |||
| @@ -93,8 +93,7 @@ Loader::ResultStatus NAX::Parse(std::string_view path) { | |||
| 93 | std::size_t i = 0; | 93 | std::size_t i = 0; |
| 94 | for (; i < sd_keys.size(); ++i) { | 94 | for (; i < sd_keys.size(); ++i) { |
| 95 | std::array<Core::Crypto::Key128, 2> nax_keys{}; | 95 | std::array<Core::Crypto::Key128, 2> nax_keys{}; |
| 96 | if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, std::string(path).c_str(), | 96 | if (!CalculateHMAC256(nax_keys.data(), sd_keys[i].data(), 0x10, path.data(), path.size())) { |
| 97 | path.size())) { | ||
| 98 | return Loader::ResultStatus::ErrorNAXKeyHMACFailed; | 97 | return Loader::ResultStatus::ErrorNAXKeyHMACFailed; |
| 99 | } | 98 | } |
| 100 | 99 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c911c6ec4..45d8eaf23 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -180,3 +180,9 @@ target_link_libraries(video_core PRIVATE glad) | |||
| 180 | if (ENABLE_VULKAN) | 180 | if (ENABLE_VULKAN) |
| 181 | target_link_libraries(video_core PRIVATE sirit) | 181 | target_link_libraries(video_core PRIVATE sirit) |
| 182 | endif() | 182 | endif() |
| 183 | |||
| 184 | if (MSVC) | ||
| 185 | target_compile_options(video_core PRIVATE /we4267) | ||
| 186 | else() | ||
| 187 | target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion) | ||
| 188 | endif() | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2bed6cb38..42ce49a4d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -261,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 | |||
| 261 | executing_macro = 0; | 261 | executing_macro = 0; |
| 262 | 262 | ||
| 263 | // Lookup the macro offset | 263 | // Lookup the macro offset |
| 264 | const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size(); | 264 | const u32 entry = |
| 265 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); | ||
| 265 | 266 | ||
| 266 | // Execute the current macro. | 267 | // Execute the current macro. |
| 267 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); | 268 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8f6bc76eb..9fafed4a2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -616,6 +616,14 @@ union Instruction { | |||
| 616 | } shfl; | 616 | } shfl; |
| 617 | 617 | ||
| 618 | union { | 618 | union { |
| 619 | BitField<44, 1, u64> ftz; | ||
| 620 | BitField<39, 2, u64> tab5cb8_2; | ||
| 621 | BitField<38, 1, u64> ndv; | ||
| 622 | BitField<47, 1, u64> cc; | ||
| 623 | BitField<28, 8, u64> swizzle; | ||
| 624 | } fswzadd; | ||
| 625 | |||
| 626 | union { | ||
| 619 | BitField<8, 8, Register> gpr; | 627 | BitField<8, 8, Register> gpr; |
| 620 | BitField<20, 24, s64> offset; | 628 | BitField<20, 24, s64> offset; |
| 621 | } gmem; | 629 | } gmem; |
| @@ -1478,7 +1486,8 @@ union Instruction { | |||
| 1478 | u32 value = static_cast<u32>(target); | 1486 | u32 value = static_cast<u32>(target); |
| 1479 | // The branch offset is relative to the next instruction and is stored in bytes, so | 1487 | // The branch offset is relative to the next instruction and is stored in bytes, so |
| 1480 | // divide it by the size of an instruction and add 1 to it. | 1488 | // divide it by the size of an instruction and add 1 to it. |
| 1481 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | 1489 | return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + |
| 1490 | 1; | ||
| 1482 | } | 1491 | } |
| 1483 | } bra; | 1492 | } bra; |
| 1484 | 1493 | ||
| @@ -1492,7 +1501,8 @@ union Instruction { | |||
| 1492 | u32 value = static_cast<u32>(target); | 1501 | u32 value = static_cast<u32>(target); |
| 1493 | // The branch offset is relative to the next instruction and is stored in bytes, so | 1502 | // The branch offset is relative to the next instruction and is stored in bytes, so |
| 1494 | // divide it by the size of an instruction and add 1 to it. | 1503 | // divide it by the size of an instruction and add 1 to it. |
| 1495 | return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1; | 1504 | return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) + |
| 1505 | 1; | ||
| 1496 | } | 1506 | } |
| 1497 | } brx; | 1507 | } brx; |
| 1498 | 1508 | ||
| @@ -1590,6 +1600,7 @@ public: | |||
| 1590 | DEPBAR, | 1600 | DEPBAR, |
| 1591 | VOTE, | 1601 | VOTE, |
| 1592 | SHFL, | 1602 | SHFL, |
| 1603 | FSWZADD, | ||
| 1593 | BFE_C, | 1604 | BFE_C, |
| 1594 | BFE_R, | 1605 | BFE_R, |
| 1595 | BFE_IMM, | 1606 | BFE_IMM, |
| @@ -1851,11 +1862,11 @@ private: | |||
| 1851 | const std::size_t bit_position = opcode_bitsize - i - 1; | 1862 | const std::size_t bit_position = opcode_bitsize - i - 1; |
| 1852 | switch (bitstring[i]) { | 1863 | switch (bitstring[i]) { |
| 1853 | case '0': | 1864 | case '0': |
| 1854 | mask |= 1 << bit_position; | 1865 | mask |= static_cast<u16>(1U << bit_position); |
| 1855 | break; | 1866 | break; |
| 1856 | case '1': | 1867 | case '1': |
| 1857 | expect |= 1 << bit_position; | 1868 | expect |= static_cast<u16>(1U << bit_position); |
| 1858 | mask |= 1 << bit_position; | 1869 | mask |= static_cast<u16>(1U << bit_position); |
| 1859 | break; | 1870 | break; |
| 1860 | default: | 1871 | default: |
| 1861 | // Ignore | 1872 | // Ignore |
| @@ -1888,6 +1899,7 @@ private: | |||
| 1888 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 1899 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 1889 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | 1900 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), |
| 1890 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), | 1901 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), |
| 1902 | INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), | ||
| 1891 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 1903 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| 1892 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1904 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1893 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1905 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c65b24c69..b30d5be74 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -62,6 +62,7 @@ Device::Device() { | |||
| 62 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 62 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 63 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 63 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 64 | GLAD_GL_NV_shader_thread_shuffle; | 64 | GLAD_GL_NV_shader_thread_shuffle; |
| 65 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | ||
| 65 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 66 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 66 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | 67 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); |
| 67 | has_variable_aoffi = TestVariableAoffi(); | 68 | has_variable_aoffi = TestVariableAoffi(); |
| @@ -79,6 +80,7 @@ Device::Device(std::nullptr_t) { | |||
| 79 | max_vertex_attributes = 16; | 80 | max_vertex_attributes = 16; |
| 80 | max_varyings = 15; | 81 | max_varyings = 15; |
| 81 | has_warp_intrinsics = true; | 82 | has_warp_intrinsics = true; |
| 83 | has_shader_ballot = true; | ||
| 82 | has_vertex_viewport_layer = true; | 84 | has_vertex_viewport_layer = true; |
| 83 | has_image_load_formatted = true; | 85 | has_image_load_formatted = true; |
| 84 | has_variable_aoffi = true; | 86 | has_variable_aoffi = true; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index bf35bd0b6..6c86fe207 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -34,6 +34,10 @@ public: | |||
| 34 | return has_warp_intrinsics; | 34 | return has_warp_intrinsics; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | bool HasShaderBallot() const { | ||
| 38 | return has_shader_ballot; | ||
| 39 | } | ||
| 40 | |||
| 37 | bool HasVertexViewportLayer() const { | 41 | bool HasVertexViewportLayer() const { |
| 38 | return has_vertex_viewport_layer; | 42 | return has_vertex_viewport_layer; |
| 39 | } | 43 | } |
| @@ -68,6 +72,7 @@ private: | |||
| 68 | u32 max_vertex_attributes{}; | 72 | u32 max_vertex_attributes{}; |
| 69 | u32 max_varyings{}; | 73 | u32 max_varyings{}; |
| 70 | bool has_warp_intrinsics{}; | 74 | bool has_warp_intrinsics{}; |
| 75 | bool has_shader_ballot{}; | ||
| 71 | bool has_vertex_viewport_layer{}; | 76 | bool has_vertex_viewport_layer{}; |
| 72 | bool has_image_load_formatted{}; | 77 | bool has_image_load_formatted{}; |
| 73 | bool has_variable_aoffi{}; | 78 | bool has_variable_aoffi{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e560d70d5..e43ba9d6b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -375,7 +375,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { | |||
| 375 | fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); | 375 | fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); |
| 376 | fbkey.colors[index] = std::move(color_surface); | 376 | fbkey.colors[index] = std::move(color_surface); |
| 377 | } | 377 | } |
| 378 | fbkey.colors_count = regs.rt_control.count; | 378 | fbkey.colors_count = static_cast<u16>(regs.rt_control.count); |
| 379 | 379 | ||
| 380 | if (depth_surface) { | 380 | if (depth_surface) { |
| 381 | // Assume that a surface will be written to if it is used as a framebuffer, even if | 381 | // Assume that a surface will be written to if it is used as a framebuffer, even if |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 35e5214a5..04a239a39 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -275,16 +275,25 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy | |||
| 275 | std::string source = fmt::format(R"(// {} | 275 | std::string source = fmt::format(R"(// {} |
| 276 | #version 430 core | 276 | #version 430 core |
| 277 | #extension GL_ARB_separate_shader_objects : enable | 277 | #extension GL_ARB_separate_shader_objects : enable |
| 278 | #extension GL_ARB_shader_viewport_layer_array : enable | ||
| 279 | #extension GL_EXT_shader_image_load_formatted : enable | ||
| 280 | #extension GL_NV_gpu_shader5 : enable | ||
| 281 | #extension GL_NV_shader_thread_group : enable | ||
| 282 | #extension GL_NV_shader_thread_shuffle : enable | ||
| 283 | )", | 278 | )", |
| 284 | GetShaderId(unique_identifier, program_type)); | 279 | GetShaderId(unique_identifier, program_type)); |
| 285 | if (is_compute) { | 280 | if (is_compute) { |
| 286 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 281 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 287 | } | 282 | } |
| 283 | if (device.HasShaderBallot()) { | ||
| 284 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 285 | } | ||
| 286 | if (device.HasVertexViewportLayer()) { | ||
| 287 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 288 | } | ||
| 289 | if (device.HasImageLoadFormatted()) { | ||
| 290 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 291 | } | ||
| 292 | if (device.HasWarpIntrinsics()) { | ||
| 293 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 294 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 295 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 296 | } | ||
| 288 | source += '\n'; | 297 | source += '\n'; |
| 289 | 298 | ||
| 290 | if (!is_compute) { | 299 | if (!is_compute) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0ce59a852..e56ed51de 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1379,6 +1379,26 @@ private: | |||
| 1379 | return GenerateUnary(operation, "float", Type::Float, type); | 1379 | return GenerateUnary(operation, "float", Type::Float, type); |
| 1380 | } | 1380 | } |
| 1381 | 1381 | ||
| 1382 | Expression FSwizzleAdd(Operation operation) { | ||
| 1383 | const std::string op_a = VisitOperand(operation, 0).AsFloat(); | ||
| 1384 | const std::string op_b = VisitOperand(operation, 1).AsFloat(); | ||
| 1385 | |||
| 1386 | if (!device.HasShaderBallot()) { | ||
| 1387 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); | ||
| 1388 | return {fmt::format("{} + {}", op_a, op_b), Type::Float}; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | const std::string instr_mask = VisitOperand(operation, 2).AsUint(); | ||
| 1392 | const std::string mask = code.GenerateTemporary(); | ||
| 1393 | code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask, | ||
| 1394 | instr_mask); | ||
| 1395 | |||
| 1396 | const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask); | ||
| 1397 | const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask); | ||
| 1398 | return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), | ||
| 1399 | Type::Float}; | ||
| 1400 | } | ||
| 1401 | |||
| 1382 | Expression ICastFloat(Operation operation) { | 1402 | Expression ICastFloat(Operation operation) { |
| 1383 | return GenerateUnary(operation, "int", Type::Int, Type::Float); | 1403 | return GenerateUnary(operation, "int", Type::Int, Type::Float); |
| 1384 | } | 1404 | } |
| @@ -1942,34 +1962,24 @@ private: | |||
| 1942 | return Vote(operation, "allThreadsEqualNV"); | 1962 | return Vote(operation, "allThreadsEqualNV"); |
| 1943 | } | 1963 | } |
| 1944 | 1964 | ||
| 1945 | template <const std::string_view& func> | 1965 | Expression ThreadId(Operation operation) { |
| 1946 | Expression Shuffle(Operation operation) { | 1966 | if (!device.HasShaderBallot()) { |
| 1947 | const std::string value = VisitOperand(operation, 0).AsFloat(); | 1967 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); |
| 1948 | if (!device.HasWarpIntrinsics()) { | 1968 | return {"0U", Type::Uint}; |
| 1949 | LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader"); | ||
| 1950 | // On a "single-thread" device we are either on the same thread or out of bounds. Both | ||
| 1951 | // cases return the passed value. | ||
| 1952 | return {value, Type::Float}; | ||
| 1953 | } | 1969 | } |
| 1954 | 1970 | return {"gl_SubGroupInvocationARB", Type::Uint}; | |
| 1955 | const std::string index = VisitOperand(operation, 1).AsUint(); | ||
| 1956 | const std::string width = VisitOperand(operation, 2).AsUint(); | ||
| 1957 | return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float}; | ||
| 1958 | } | 1971 | } |
| 1959 | 1972 | ||
| 1960 | template <const std::string_view& func> | 1973 | Expression ShuffleIndexed(Operation operation) { |
| 1961 | Expression InRangeShuffle(Operation operation) { | 1974 | std::string value = VisitOperand(operation, 0).AsFloat(); |
| 1962 | const std::string index = VisitOperand(operation, 0).AsUint(); | 1975 | |
| 1963 | const std::string width = VisitOperand(operation, 1).AsUint(); | 1976 | if (!device.HasShaderBallot()) { |
| 1964 | if (!device.HasWarpIntrinsics()) { | 1977 | LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader"); |
| 1965 | // On a "single-thread" device we are only in bounds when the requested index is 0. | 1978 | return {std::move(value), Type::Float}; |
| 1966 | return {fmt::format("({} == 0U)", index), Type::Bool}; | ||
| 1967 | } | 1979 | } |
| 1968 | 1980 | ||
| 1969 | const std::string in_range = code.GenerateTemporary(); | 1981 | const std::string index = VisitOperand(operation, 1).AsUint(); |
| 1970 | code.AddLine("bool {};", in_range); | 1982 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; |
| 1971 | code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range); | ||
| 1972 | return {in_range, Type::Bool}; | ||
| 1973 | } | 1983 | } |
| 1974 | 1984 | ||
| 1975 | struct Func final { | 1985 | struct Func final { |
| @@ -1981,11 +1991,6 @@ private: | |||
| 1981 | static constexpr std::string_view Or = "Or"; | 1991 | static constexpr std::string_view Or = "Or"; |
| 1982 | static constexpr std::string_view Xor = "Xor"; | 1992 | static constexpr std::string_view Xor = "Xor"; |
| 1983 | static constexpr std::string_view Exchange = "Exchange"; | 1993 | static constexpr std::string_view Exchange = "Exchange"; |
| 1984 | |||
| 1985 | static constexpr std::string_view ShuffleIndexed = "shuffleNV"; | ||
| 1986 | static constexpr std::string_view ShuffleUp = "shuffleUpNV"; | ||
| 1987 | static constexpr std::string_view ShuffleDown = "shuffleDownNV"; | ||
| 1988 | static constexpr std::string_view ShuffleButterfly = "shuffleXorNV"; | ||
| 1989 | }; | 1994 | }; |
| 1990 | 1995 | ||
| 1991 | static constexpr std::array operation_decompilers = { | 1996 | static constexpr std::array operation_decompilers = { |
| @@ -2016,6 +2021,7 @@ private: | |||
| 2016 | &GLSLDecompiler::FTrunc, | 2021 | &GLSLDecompiler::FTrunc, |
| 2017 | &GLSLDecompiler::FCastInteger<Type::Int>, | 2022 | &GLSLDecompiler::FCastInteger<Type::Int>, |
| 2018 | &GLSLDecompiler::FCastInteger<Type::Uint>, | 2023 | &GLSLDecompiler::FCastInteger<Type::Uint>, |
| 2024 | &GLSLDecompiler::FSwizzleAdd, | ||
| 2019 | 2025 | ||
| 2020 | &GLSLDecompiler::Add<Type::Int>, | 2026 | &GLSLDecompiler::Add<Type::Int>, |
| 2021 | &GLSLDecompiler::Mul<Type::Int>, | 2027 | &GLSLDecompiler::Mul<Type::Int>, |
| @@ -2151,15 +2157,8 @@ private: | |||
| 2151 | &GLSLDecompiler::VoteAny, | 2157 | &GLSLDecompiler::VoteAny, |
| 2152 | &GLSLDecompiler::VoteEqual, | 2158 | &GLSLDecompiler::VoteEqual, |
| 2153 | 2159 | ||
| 2154 | &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>, | 2160 | &GLSLDecompiler::ThreadId, |
| 2155 | &GLSLDecompiler::Shuffle<Func::ShuffleUp>, | 2161 | &GLSLDecompiler::ShuffleIndexed, |
| 2156 | &GLSLDecompiler::Shuffle<Func::ShuffleDown>, | ||
| 2157 | &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>, | ||
| 2158 | |||
| 2159 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>, | ||
| 2160 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>, | ||
| 2161 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>, | ||
| 2162 | &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>, | ||
| 2163 | }; | 2162 | }; |
| 2164 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2163 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2165 | 2164 | ||
| @@ -2492,6 +2491,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | |||
| 2492 | bvec2 is_nan2 = isnan(pair2); | 2491 | bvec2 is_nan2 = isnan(pair2); |
| 2493 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | 2492 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); |
| 2494 | } | 2493 | } |
| 2494 | |||
| 2495 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2496 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2495 | )"; | 2497 | )"; |
| 2496 | } | 2498 | } |
| 2497 | 2499 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 4bbd17b12..7646cbb0e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -323,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, | |||
| 323 | // (e.g. handheld mode) on a 1920x1080 framebuffer. | 323 | // (e.g. handheld mode) on a 1920x1080 framebuffer. |
| 324 | f32 scale_u = 1.f, scale_v = 1.f; | 324 | f32 scale_u = 1.f, scale_v = 1.f; |
| 325 | if (framebuffer_crop_rect.GetWidth() > 0) { | 325 | if (framebuffer_crop_rect.GetWidth() > 0) { |
| 326 | scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width; | 326 | scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / |
| 327 | static_cast<f32>(screen_info.texture.width); | ||
| 327 | } | 328 | } |
| 328 | if (framebuffer_crop_rect.GetHeight() > 0) { | 329 | if (framebuffer_crop_rect.GetHeight() > 0) { |
| 329 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height; | 330 | scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / |
| 331 | static_cast<f32>(screen_info.texture.height); | ||
| 330 | } | 332 | } |
| 331 | 333 | ||
| 332 | std::array<ScreenRectVertex, 4> vertices = {{ | 334 | std::array<ScreenRectVertex, 4> vertices = {{ |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 42cf068b6..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -783,6 +783,11 @@ private: | |||
| 783 | return {}; | 783 | return {}; |
| 784 | } | 784 | } |
| 785 | 785 | ||
| 786 | Id FSwizzleAdd(Operation operation) { | ||
| 787 | UNIMPLEMENTED(); | ||
| 788 | return {}; | ||
| 789 | } | ||
| 790 | |||
| 786 | Id HNegate(Operation operation) { | 791 | Id HNegate(Operation operation) { |
| 787 | UNIMPLEMENTED(); | 792 | UNIMPLEMENTED(); |
| 788 | return {}; | 793 | return {}; |
| @@ -1195,42 +1200,12 @@ private: | |||
| 1195 | return {}; | 1200 | return {}; |
| 1196 | } | 1201 | } |
| 1197 | 1202 | ||
| 1198 | Id ShuffleIndexed(Operation) { | 1203 | Id ThreadId(Operation) { |
| 1199 | UNIMPLEMENTED(); | ||
| 1200 | return {}; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | Id ShuffleUp(Operation) { | ||
| 1204 | UNIMPLEMENTED(); | ||
| 1205 | return {}; | ||
| 1206 | } | ||
| 1207 | |||
| 1208 | Id ShuffleDown(Operation) { | ||
| 1209 | UNIMPLEMENTED(); | ||
| 1210 | return {}; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | Id ShuffleButterfly(Operation) { | ||
| 1214 | UNIMPLEMENTED(); | ||
| 1215 | return {}; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | Id InRangeShuffleIndexed(Operation) { | ||
| 1219 | UNIMPLEMENTED(); | 1204 | UNIMPLEMENTED(); |
| 1220 | return {}; | 1205 | return {}; |
| 1221 | } | 1206 | } |
| 1222 | 1207 | ||
| 1223 | Id InRangeShuffleUp(Operation) { | 1208 | Id ShuffleIndexed(Operation) { |
| 1224 | UNIMPLEMENTED(); | ||
| 1225 | return {}; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | Id InRangeShuffleDown(Operation) { | ||
| 1229 | UNIMPLEMENTED(); | ||
| 1230 | return {}; | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | Id InRangeShuffleButterfly(Operation) { | ||
| 1234 | UNIMPLEMENTED(); | 1209 | UNIMPLEMENTED(); |
| 1235 | return {}; | 1210 | return {}; |
| 1236 | } | 1211 | } |
| @@ -1393,6 +1368,7 @@ private: | |||
| 1393 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, | 1368 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, |
| 1394 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, | 1369 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, |
| 1395 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, | 1370 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, |
| 1371 | &SPIRVDecompiler::FSwizzleAdd, | ||
| 1396 | 1372 | ||
| 1397 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, | 1373 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, |
| 1398 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, | 1374 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, |
| @@ -1528,15 +1504,8 @@ private: | |||
| 1528 | &SPIRVDecompiler::VoteAny, | 1504 | &SPIRVDecompiler::VoteAny, |
| 1529 | &SPIRVDecompiler::VoteEqual, | 1505 | &SPIRVDecompiler::VoteEqual, |
| 1530 | 1506 | ||
| 1507 | &SPIRVDecompiler::ThreadId, | ||
| 1531 | &SPIRVDecompiler::ShuffleIndexed, | 1508 | &SPIRVDecompiler::ShuffleIndexed, |
| 1532 | &SPIRVDecompiler::ShuffleUp, | ||
| 1533 | &SPIRVDecompiler::ShuffleDown, | ||
| 1534 | &SPIRVDecompiler::ShuffleButterfly, | ||
| 1535 | |||
| 1536 | &SPIRVDecompiler::InRangeShuffleIndexed, | ||
| 1537 | &SPIRVDecompiler::InRangeShuffleUp, | ||
| 1538 | &SPIRVDecompiler::InRangeShuffleDown, | ||
| 1539 | &SPIRVDecompiler::InRangeShuffleButterfly, | ||
| 1540 | }; | 1509 | }; |
| 1541 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 1510 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 1542 | 1511 | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 21fb9cb83..22c3e5120 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -154,10 +154,10 @@ void ShaderIR::Decode() { | |||
| 154 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | 154 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); |
| 155 | [[fallthrough]]; | 155 | [[fallthrough]]; |
| 156 | case CompileDepth::BruteForce: { | 156 | case CompileDepth::BruteForce: { |
| 157 | const auto shader_end = static_cast<u32>(program_code.size()); | ||
| 157 | coverage_begin = main_offset; | 158 | coverage_begin = main_offset; |
| 158 | const std::size_t shader_end = program_code.size(); | ||
| 159 | coverage_end = shader_end; | 159 | coverage_end = shader_end; |
| 160 | for (u32 label = main_offset; label < shader_end; label++) { | 160 | for (u32 label = main_offset; label < shader_end; ++label) { |
| 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); |
| 162 | } | 162 | } |
| 163 | break; | 163 | break; |
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; | |||
| 17 | using Tegra::Shader::VoteOperation; | 17 | using Tegra::Shader::VoteOperation; |
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | |||
| 20 | OperationCode GetOperationCode(VoteOperation vote_op) { | 21 | OperationCode GetOperationCode(VoteOperation vote_op) { |
| 21 | switch (vote_op) { | 22 | switch (vote_op) { |
| 22 | case VoteOperation::All: | 23 | case VoteOperation::All: |
| @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { | |||
| 30 | return OperationCode::VoteAll; | 31 | return OperationCode::VoteAll; |
| 31 | } | 32 | } |
| 32 | } | 33 | } |
| 34 | |||
| 33 | } // Anonymous namespace | 35 | } // Anonymous namespace |
| 34 | 36 | ||
| 35 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { |
| @@ -46,50 +48,59 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 46 | break; | 48 | break; |
| 47 | } | 49 | } |
| 48 | case OpCode::Id::SHFL: { | 50 | case OpCode::Id::SHFL: { |
| 49 | Node width = [this, instr] { | 51 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |
| 50 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | 52 | : GetRegister(instr.gpr39); |
| 51 | : GetRegister(instr.gpr39); | 53 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) |
| 52 | 54 | : GetRegister(instr.gpr20); | |
| 53 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | 55 | |
| 54 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | 56 | Node thread_id = Operation(OperationCode::ThreadId); |
| 55 | // different parameters that don't properly map to GLSL's interface, but it should work | 57 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); |
| 56 | // for cases emitted by Nvidia's compiler. | 58 | Node seg_mask = BitfieldExtract(mask, 8, 16); |
| 57 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 58 | return Operation( | ||
| 59 | OperationCode::ILogicalShiftRight, | ||
| 60 | Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), | ||
| 61 | Immediate(8)); | ||
| 62 | } else { | ||
| 63 | return Operation(OperationCode::ILogicalShiftRight, | ||
| 64 | Operation(OperationCode::IAdd, Immediate(0x201F), | ||
| 65 | Operation(OperationCode::INegate, std::move(mask))), | ||
| 66 | Immediate(8)); | ||
| 67 | } | ||
| 68 | }(); | ||
| 69 | 59 | ||
| 70 | const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { | 60 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); |
| 61 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 62 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 63 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 64 | |||
| 65 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 71 | switch (instr.shfl.operation) { | 66 | switch (instr.shfl.operation) { |
| 72 | case ShuffleOperation::Idx: | 67 | case ShuffleOperation::Idx: |
| 73 | return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; | 68 | return Operation(OperationCode::IBitwiseOr, |
| 74 | case ShuffleOperation::Up: | 69 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), |
| 75 | return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; | 70 | min_thread_id); |
| 76 | case ShuffleOperation::Down: | 71 | case ShuffleOperation::Down: |
| 77 | return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; | 72 | return Operation(OperationCode::IAdd, thread_id, index); |
| 73 | case ShuffleOperation::Up: | ||
| 74 | return Operation(OperationCode::IAdd, thread_id, | ||
| 75 | Operation(OperationCode::INegate, index)); | ||
| 78 | case ShuffleOperation::Bfly: | 76 | case ShuffleOperation::Bfly: |
| 79 | return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; | 77 | return Operation(OperationCode::IBitwiseXor, thread_id, index); |
| 80 | } | 78 | } |
| 81 | UNREACHABLE_MSG("Invalid SHFL operation: {}", | 79 | UNREACHABLE(); |
| 82 | static_cast<u64>(instr.shfl.operation.Value())); | 80 | return Immediate(0U); |
| 83 | return {}; | ||
| 84 | }(); | 81 | }(); |
| 85 | 82 | ||
| 86 | // Setting the predicate before the register is intentional to avoid overwriting. | 83 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { |
| 87 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | 84 | if (instr.shfl.operation == ShuffleOperation::Up) { |
| 88 | : GetRegister(instr.gpr20); | 85 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); |
| 89 | SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); | 86 | } else { |
| 87 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 88 | } | ||
| 89 | }(); | ||
| 90 | |||
| 91 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 90 | SetRegister( | 92 | SetRegister( |
| 91 | bb, instr.gpr0, | 93 | bb, instr.gpr0, |
| 92 | Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); | 94 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); |
| 95 | break; | ||
| 96 | } | ||
| 97 | case OpCode::Id::FSWZADD: { | ||
| 98 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 99 | |||
| 100 | Node op_a = GetRegister(instr.gpr8); | ||
| 101 | Node op_b = GetRegister(instr.gpr20); | ||
| 102 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 103 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 93 | break; | 104 | break; |
| 94 | } | 105 | } |
| 95 | default: | 106 | default: |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4300d9ff4..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -47,6 +47,7 @@ enum class OperationCode { | |||
| 47 | FTrunc, /// (MetaArithmetic, float a) -> float | 47 | FTrunc, /// (MetaArithmetic, float a) -> float |
| 48 | FCastInteger, /// (MetaArithmetic, int a) -> float | 48 | FCastInteger, /// (MetaArithmetic, int a) -> float |
| 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float |
| 50 | FSwizzleAdd, /// (float a, float b, uint mask) -> float | ||
| 50 | 51 | ||
| 51 | IAdd, /// (MetaArithmetic, int a, int b) -> int | 52 | IAdd, /// (MetaArithmetic, int a, int b) -> int |
| 52 | IMul, /// (MetaArithmetic, int a, int b) -> int | 53 | IMul, /// (MetaArithmetic, int a, int b) -> int |
| @@ -181,15 +182,8 @@ enum class OperationCode { | |||
| 181 | VoteAny, /// (bool) -> bool | 182 | VoteAny, /// (bool) -> bool |
| 182 | VoteEqual, /// (bool) -> bool | 183 | VoteEqual, /// (bool) -> bool |
| 183 | 184 | ||
| 184 | ShuffleIndexed, /// (uint value, uint index, uint width) -> uint | 185 | ThreadId, /// () -> uint |
| 185 | ShuffleUp, /// (uint value, uint index, uint width) -> uint | 186 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 186 | ShuffleDown, /// (uint value, uint index, uint width) -> uint | ||
| 187 | ShuffleButterfly, /// (uint value, uint index, uint width) -> uint | ||
| 188 | |||
| 189 | InRangeShuffleIndexed, /// (uint index, uint width) -> bool | ||
| 190 | InRangeShuffleUp, /// (uint index, uint width) -> bool | ||
| 191 | InRangeShuffleDown, /// (uint index, uint width) -> bool | ||
| 192 | InRangeShuffleButterfly, /// (uint index, uint width) -> bool | ||
| 193 | 187 | ||
| 194 | Amount, | 188 | Amount, |
| 195 | }; | 189 | }; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 26c8fde22..76a849818 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -49,7 +49,7 @@ public: | |||
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | u32 GetSize() const { | 51 | u32 GetSize() const { |
| 52 | return max_offset + sizeof(float); | 52 | return max_offset + static_cast<u32>(sizeof(float)); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | u32 GetMaxOffset() const { | 55 | u32 GetMaxOffset() const { |
| @@ -165,8 +165,8 @@ public: | |||
| 165 | return program_manager.GetVariables(); | 165 | return program_manager.GetVariables(); |
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | u32 ConvertAddressToNvidiaSpace(const u32 address) const { | 168 | u32 ConvertAddressToNvidiaSpace(u32 address) const { |
| 169 | return (address - main_offset) * sizeof(Tegra::Shader::Instruction); | 169 | return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | /// Returns a condition code evaluated from internal flags | 172 | /// Returns a condition code evaluated from internal flags |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 58b608a36..33bd31865 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -92,11 +92,11 @@ private: | |||
| 92 | const unsigned int mask = 1 << m_NextBit++; | 92 | const unsigned int mask = 1 << m_NextBit++; |
| 93 | 93 | ||
| 94 | // clear the bit | 94 | // clear the bit |
| 95 | *m_CurByte &= ~mask; | 95 | *m_CurByte &= static_cast<unsigned char>(~mask); |
| 96 | 96 | ||
| 97 | // Write the bit, if necessary | 97 | // Write the bit, if necessary |
| 98 | if (b) | 98 | if (b) |
| 99 | *m_CurByte |= mask; | 99 | *m_CurByte |= static_cast<unsigned char>(mask); |
| 100 | 100 | ||
| 101 | // Next byte? | 101 | // Next byte? |
| 102 | if (m_NextBit >= 8) { | 102 | if (m_NextBit >= 8) { |
| @@ -137,7 +137,7 @@ public: | |||
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | uint64_t mask = (1 << (end - start + 1)) - 1; | 139 | uint64_t mask = (1 << (end - start + 1)) - 1; |
| 140 | return (m_Bits >> start) & mask; | 140 | return (m_Bits >> start) & static_cast<IntType>(mask); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | private: | 143 | private: |
| @@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | |||
| 656 | return 0; | 656 | return 0; |
| 657 | if (toBit == 0) | 657 | if (toBit == 0) |
| 658 | return 0; | 658 | return 0; |
| 659 | IntType v = val & ((1 << numBits) - 1); | 659 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
| 660 | IntType res = v; | 660 | IntType res = v; |
| 661 | uint32_t reslen = numBits; | 661 | uint32_t reslen = numBits; |
| 662 | while (reslen < toBit) { | 662 | while (reslen < toBit) { |
| @@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | |||
| 666 | comp = numBits - newshift; | 666 | comp = numBits - newshift; |
| 667 | numBits = newshift; | 667 | numBits = newshift; |
| 668 | } | 668 | } |
| 669 | res <<= numBits; | 669 | res = static_cast<IntType>(res << numBits); |
| 670 | res |= v >> comp; | 670 | res = static_cast<IntType>(res | (v >> comp)); |
| 671 | reslen += numBits; | 671 | reslen += numBits; |
| 672 | } | 672 | } |
| 673 | return res; | 673 | return res; |
| @@ -714,7 +714,7 @@ public: | |||
| 714 | // Do nothing | 714 | // Do nothing |
| 715 | return val; | 715 | return val; |
| 716 | } else if (oldDepth == 0 && newDepth != 0) { | 716 | } else if (oldDepth == 0 && newDepth != 0) { |
| 717 | return (1 << newDepth) - 1; | 717 | return static_cast<ChannelType>((1 << newDepth) - 1); |
| 718 | } else if (newDepth > oldDepth) { | 718 | } else if (newDepth > oldDepth) { |
| 719 | return Replicate(val, oldDepth, newDepth); | 719 | return Replicate(val, oldDepth, newDepth); |
| 720 | } else { | 720 | } else { |
| @@ -722,10 +722,11 @@ public: | |||
| 722 | if (newDepth == 0) { | 722 | if (newDepth == 0) { |
| 723 | return 0xFF; | 723 | return 0xFF; |
| 724 | } else { | 724 | } else { |
| 725 | uint8_t bitsWasted = oldDepth - newDepth; | 725 | uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); |
| 726 | uint16_t v = static_cast<uint16_t>(val); | 726 | uint16_t v = static_cast<uint16_t>(val); |
| 727 | v = (v + (1 << (bitsWasted - 1))) >> bitsWasted; | 727 | v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 728 | v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1); | 728 | v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), |
| 729 | static_cast<uint16_t>((1 << newDepth) - 1)); | ||
| 729 | return static_cast<uint8_t>(v); | 730 | return static_cast<uint8_t>(v); |
| 730 | } | 731 | } |
| 731 | } | 732 | } |
| @@ -1191,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1191 | uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); | 1192 | uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); |
| 1192 | uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); | 1193 | uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); |
| 1193 | 1194 | ||
| 1194 | seed1 *= seed1; | 1195 | seed1 = static_cast<uint8_t>(seed1 * seed1); |
| 1195 | seed2 *= seed2; | 1196 | seed2 = static_cast<uint8_t>(seed2 * seed2); |
| 1196 | seed3 *= seed3; | 1197 | seed3 = static_cast<uint8_t>(seed3 * seed3); |
| 1197 | seed4 *= seed4; | 1198 | seed4 = static_cast<uint8_t>(seed4 * seed4); |
| 1198 | seed5 *= seed5; | 1199 | seed5 = static_cast<uint8_t>(seed5 * seed5); |
| 1199 | seed6 *= seed6; | 1200 | seed6 = static_cast<uint8_t>(seed6 * seed6); |
| 1200 | seed7 *= seed7; | 1201 | seed7 = static_cast<uint8_t>(seed7 * seed7); |
| 1201 | seed8 *= seed8; | 1202 | seed8 = static_cast<uint8_t>(seed8 * seed8); |
| 1202 | seed9 *= seed9; | 1203 | seed9 = static_cast<uint8_t>(seed9 * seed9); |
| 1203 | seed10 *= seed10; | 1204 | seed10 = static_cast<uint8_t>(seed10 * seed10); |
| 1204 | seed11 *= seed11; | 1205 | seed11 = static_cast<uint8_t>(seed11 * seed11); |
| 1205 | seed12 *= seed12; | 1206 | seed12 = static_cast<uint8_t>(seed12 * seed12); |
| 1206 | 1207 | ||
| 1207 | int32_t sh1, sh2, sh3; | 1208 | int32_t sh1, sh2, sh3; |
| 1208 | if (seed & 1) { | 1209 | if (seed & 1) { |
| @@ -1214,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1214 | } | 1215 | } |
| 1215 | sh3 = (seed & 0x10) ? sh1 : sh2; | 1216 | sh3 = (seed & 0x10) ? sh1 : sh2; |
| 1216 | 1217 | ||
| 1217 | seed1 >>= sh1; | 1218 | seed1 = static_cast<uint8_t>(seed1 >> sh1); |
| 1218 | seed2 >>= sh2; | 1219 | seed2 = static_cast<uint8_t>(seed2 >> sh2); |
| 1219 | seed3 >>= sh1; | 1220 | seed3 = static_cast<uint8_t>(seed3 >> sh1); |
| 1220 | seed4 >>= sh2; | 1221 | seed4 = static_cast<uint8_t>(seed4 >> sh2); |
| 1221 | seed5 >>= sh1; | 1222 | seed5 = static_cast<uint8_t>(seed5 >> sh1); |
| 1222 | seed6 >>= sh2; | 1223 | seed6 = static_cast<uint8_t>(seed6 >> sh2); |
| 1223 | seed7 >>= sh1; | 1224 | seed7 = static_cast<uint8_t>(seed7 >> sh1); |
| 1224 | seed8 >>= sh2; | 1225 | seed8 = static_cast<uint8_t>(seed8 >> sh2); |
| 1225 | seed9 >>= sh3; | 1226 | seed9 = static_cast<uint8_t>(seed9 >> sh3); |
| 1226 | seed10 >>= sh3; | 1227 | seed10 = static_cast<uint8_t>(seed10 >> sh3); |
| 1227 | seed11 >>= sh3; | 1228 | seed11 = static_cast<uint8_t>(seed11 >> sh3); |
| 1228 | seed12 >>= sh3; | 1229 | seed12 = static_cast<uint8_t>(seed12 >> sh3); |
| 1229 | 1230 | ||
| 1230 | int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | 1231 | int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| 1231 | int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | 1232 | int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| @@ -1558,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1558 | 1559 | ||
| 1559 | // Make sure that higher non-texel bits are set to zero | 1560 | // Make sure that higher non-texel bits are set to zero |
| 1560 | const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1561 | const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1561 | texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1; | 1562 | texelWeightData[clearByteStart - 1] = |
| 1563 | texelWeightData[clearByteStart - 1] & | ||
| 1564 | static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | ||
| 1562 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1563 | 1566 | ||
| 1564 | std::vector<IntegerEncodedValue> texelWeightValues; | 1567 | std::vector<IntegerEncodedValue> texelWeightValues; |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 27c8ce975..8e82c6748 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -342,13 +342,14 @@ struct TSCEntry { | |||
| 342 | float GetLodBias() const { | 342 | float GetLodBias() const { |
| 343 | // Sign extend the 13-bit value. | 343 | // Sign extend the 13-bit value. |
| 344 | constexpr u32 mask = 1U << (13 - 1); | 344 | constexpr u32 mask = 1U << (13 - 1); |
| 345 | return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f; | 345 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; |
| 346 | } | 346 | } |
| 347 | 347 | ||
| 348 | std::array<float, 4> GetBorderColor() const { | 348 | std::array<float, 4> GetBorderColor() const { |
| 349 | if (srgb_conversion) { | 349 | if (srgb_conversion) { |
| 350 | return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, | 350 | return {static_cast<float>(srgb_border_color_r) / 255.0f, |
| 351 | srgb_border_color_b / 255.0f, border_color[3]}; | 351 | static_cast<float>(srgb_border_color_g) / 255.0f, |
| 352 | static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; | ||
| 352 | } | 353 | } |
| 353 | return border_color; | 354 | return border_color; |
| 354 | } | 355 | } |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 60cda0ca3..8e947394c 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { | |||
| 28 | 28 | ||
| 29 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { | 29 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { |
| 30 | return static_cast<u16>( | 30 | return static_cast<u16>( |
| 31 | Settings::values.resolution_factor | 31 | Settings::values.resolution_factor != 0 |
| 32 | ? Settings::values.resolution_factor | 32 | ? Settings::values.resolution_factor |
| 33 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); | 33 | : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio()); |
| 34 | } | 34 | } |