diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/bit_util.h | 7 | ||||
| -rw-r--r-- | src/common/logging/backend.cpp | 2 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 104 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 64 | ||||
| -rw-r--r-- | src/core/core.cpp | 4 | ||||
| -rw-r--r-- | src/core/frontend/emu_window.h | 11 | ||||
| -rw-r--r-- | src/core/hle/kernel/k_page_table.cpp | 159 | ||||
| -rw-r--r-- | src/core/hle/kernel/k_page_table.h | 12 | ||||
| -rw-r--r-- | src/core/hle/service/ldr/ldr.cpp | 78 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | 580 | ||||
| -rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py | 92 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.h | 7 | ||||
| -rw-r--r-- | src/video_core/video_core.cpp | 1 |
16 files changed, 960 insertions, 229 deletions
diff --git a/src/common/bit_util.h b/src/common/bit_util.h index f50d3308a..f37538e06 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h | |||
| @@ -57,4 +57,11 @@ requires std::is_integral_v<T> | |||
| 57 | return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); | 57 | return static_cast<T>(1ULL << ((8U * sizeof(T)) - std::countl_zero(value - 1U))); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | template <size_t bit_index, typename T> | ||
| 61 | requires std::is_integral_v<T> | ||
| 62 | [[nodiscard]] constexpr bool Bit(const T value) { | ||
| 63 | static_assert(bit_index < BitSize<T>(), "bit_index must be smaller than size of T"); | ||
| 64 | return ((value >> bit_index) & T(1)) == T(1); | ||
| 65 | } | ||
| 66 | |||
| 60 | } // namespace Common | 67 | } // namespace Common |
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index f1c9ed6c4..4a2462ec4 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp | |||
| @@ -276,9 +276,9 @@ private: | |||
| 276 | ColorConsoleBackend color_console_backend{}; | 276 | ColorConsoleBackend color_console_backend{}; |
| 277 | FileBackend file_backend; | 277 | FileBackend file_backend; |
| 278 | 278 | ||
| 279 | std::jthread backend_thread; | ||
| 280 | MPSCQueue<Entry, true> message_queue{}; | 279 | MPSCQueue<Entry, true> message_queue{}; |
| 281 | std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; | 280 | std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; |
| 281 | std::jthread backend_thread; | ||
| 282 | }; | 282 | }; |
| 283 | } // namespace | 283 | } // namespace |
| 284 | 284 | ||
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index fbeacc7e2..99d87f586 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -1,8 +1,12 @@ | |||
| 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project | 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Project Licensed under GPLv2 or any later version Refer to the license.txt file included. |
| 3 | // Refer to the license.txt file included. | ||
| 4 | 3 | ||
| 4 | #include <array> | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <iterator> | ||
| 7 | #include <span> | ||
| 8 | #include <string_view> | ||
| 9 | #include "common/bit_util.h" | ||
| 6 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 7 | #include "common/x64/cpu_detect.h" | 11 | #include "common/x64/cpu_detect.h" |
| 8 | 12 | ||
| @@ -17,7 +21,7 @@ | |||
| 17 | // clang-format on | 21 | // clang-format on |
| 18 | #endif | 22 | #endif |
| 19 | 23 | ||
| 20 | static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { | 24 | static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { |
| 21 | #if defined(__DragonFly__) || defined(__FreeBSD__) | 25 | #if defined(__DragonFly__) || defined(__FreeBSD__) |
| 22 | // Despite the name, this is just do_cpuid() with ECX as second input. | 26 | // Despite the name, this is just do_cpuid() with ECX as second input. |
| 23 | cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); | 27 | cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); |
| @@ -30,7 +34,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { | |||
| 30 | #endif | 34 | #endif |
| 31 | } | 35 | } |
| 32 | 36 | ||
| 33 | static inline void __cpuid(int info[4], int function_id) { | 37 | static inline void __cpuid(int info[4], u32 function_id) { |
| 34 | return __cpuidex(info, function_id, 0); | 38 | return __cpuidex(info, function_id, 0); |
| 35 | } | 39 | } |
| 36 | 40 | ||
| @@ -45,6 +49,17 @@ static inline u64 _xgetbv(u32 index) { | |||
| 45 | 49 | ||
| 46 | namespace Common { | 50 | namespace Common { |
| 47 | 51 | ||
| 52 | CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { | ||
| 53 | if (brand_string == "GenuineIntel") { | ||
| 54 | return Manufacturer::Intel; | ||
| 55 | } else if (brand_string == "AuthenticAMD") { | ||
| 56 | return Manufacturer::AMD; | ||
| 57 | } else if (brand_string == "HygonGenuine") { | ||
| 58 | return Manufacturer::Hygon; | ||
| 59 | } | ||
| 60 | return Manufacturer::Unknown; | ||
| 61 | } | ||
| 62 | |||
| 48 | // Detects the various CPU features | 63 | // Detects the various CPU features |
| 49 | static CPUCaps Detect() { | 64 | static CPUCaps Detect() { |
| 50 | CPUCaps caps = {}; | 65 | CPUCaps caps = {}; |
| @@ -53,57 +68,44 @@ static CPUCaps Detect() { | |||
| 53 | // yuzu at all anyway | 68 | // yuzu at all anyway |
| 54 | 69 | ||
| 55 | int cpu_id[4]; | 70 | int cpu_id[4]; |
| 56 | memset(caps.brand_string, 0, sizeof(caps.brand_string)); | ||
| 57 | 71 | ||
| 58 | // Detect CPU's CPUID capabilities and grab CPU string | 72 | // Detect CPU's CPUID capabilities and grab manufacturer string |
| 59 | __cpuid(cpu_id, 0x00000000); | 73 | __cpuid(cpu_id, 0x00000000); |
| 60 | u32 max_std_fn = cpu_id[0]; // EAX | 74 | const u32 max_std_fn = cpu_id[0]; // EAX |
| 61 | |||
| 62 | std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); | ||
| 63 | std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); | ||
| 64 | std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); | ||
| 65 | if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) | ||
| 66 | caps.manufacturer = Manufacturer::Intel; | ||
| 67 | else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) | ||
| 68 | caps.manufacturer = Manufacturer::AMD; | ||
| 69 | else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) | ||
| 70 | caps.manufacturer = Manufacturer::Hygon; | ||
| 71 | else | ||
| 72 | caps.manufacturer = Manufacturer::Unknown; | ||
| 73 | 75 | ||
| 74 | __cpuid(cpu_id, 0x80000000); | 76 | std::memset(caps.brand_string, 0, std::size(caps.brand_string)); |
| 77 | std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32)); | ||
| 78 | std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32)); | ||
| 79 | std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32)); | ||
| 75 | 80 | ||
| 76 | u32 max_ex_fn = cpu_id[0]; | 81 | caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string); |
| 77 | 82 | ||
| 78 | // Set reasonable default brand string even if brand string not available | 83 | // Set reasonable default cpu string even if brand string not available |
| 79 | strcpy(caps.cpu_string, caps.brand_string); | 84 | std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string)); |
| 85 | |||
| 86 | __cpuid(cpu_id, 0x80000000); | ||
| 87 | |||
| 88 | const u32 max_ex_fn = cpu_id[0]; | ||
| 80 | 89 | ||
| 81 | // Detect family and other miscellaneous features | 90 | // Detect family and other miscellaneous features |
| 82 | if (max_std_fn >= 1) { | 91 | if (max_std_fn >= 1) { |
| 83 | __cpuid(cpu_id, 0x00000001); | 92 | __cpuid(cpu_id, 0x00000001); |
| 84 | if ((cpu_id[3] >> 25) & 1) | 93 | caps.sse = Common::Bit<25>(cpu_id[3]); |
| 85 | caps.sse = true; | 94 | caps.sse2 = Common::Bit<26>(cpu_id[3]); |
| 86 | if ((cpu_id[3] >> 26) & 1) | 95 | caps.sse3 = Common::Bit<0>(cpu_id[2]); |
| 87 | caps.sse2 = true; | 96 | caps.ssse3 = Common::Bit<9>(cpu_id[2]); |
| 88 | if ((cpu_id[2]) & 1) | 97 | caps.sse4_1 = Common::Bit<19>(cpu_id[2]); |
| 89 | caps.sse3 = true; | 98 | caps.sse4_2 = Common::Bit<20>(cpu_id[2]); |
| 90 | if ((cpu_id[2] >> 9) & 1) | 99 | caps.aes = Common::Bit<25>(cpu_id[2]); |
| 91 | caps.ssse3 = true; | ||
| 92 | if ((cpu_id[2] >> 19) & 1) | ||
| 93 | caps.sse4_1 = true; | ||
| 94 | if ((cpu_id[2] >> 20) & 1) | ||
| 95 | caps.sse4_2 = true; | ||
| 96 | if ((cpu_id[2] >> 25) & 1) | ||
| 97 | caps.aes = true; | ||
| 98 | 100 | ||
| 99 | // AVX support requires 3 separate checks: | 101 | // AVX support requires 3 separate checks: |
| 100 | // - Is the AVX bit set in CPUID? | 102 | // - Is the AVX bit set in CPUID? |
| 101 | // - Is the XSAVE bit set in CPUID? | 103 | // - Is the XSAVE bit set in CPUID? |
| 102 | // - XGETBV result has the XCR bit set. | 104 | // - XGETBV result has the XCR bit set. |
| 103 | if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { | 105 | if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) { |
| 104 | if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { | 106 | if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { |
| 105 | caps.avx = true; | 107 | caps.avx = true; |
| 106 | if ((cpu_id[2] >> 12) & 1) | 108 | if (Common::Bit<12>(cpu_id[2])) |
| 107 | caps.fma = true; | 109 | caps.fma = true; |
| 108 | } | 110 | } |
| 109 | } | 111 | } |
| @@ -111,15 +113,13 @@ static CPUCaps Detect() { | |||
| 111 | if (max_std_fn >= 7) { | 113 | if (max_std_fn >= 7) { |
| 112 | __cpuidex(cpu_id, 0x00000007, 0x00000000); | 114 | __cpuidex(cpu_id, 0x00000007, 0x00000000); |
| 113 | // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed | 115 | // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed |
| 114 | if ((cpu_id[1] >> 5) & 1) | 116 | caps.avx2 = caps.avx && Common::Bit<5>(cpu_id[1]); |
| 115 | caps.avx2 = caps.avx; | 117 | caps.bmi1 = Common::Bit<3>(cpu_id[1]); |
| 116 | if ((cpu_id[1] >> 3) & 1) | 118 | caps.bmi2 = Common::Bit<8>(cpu_id[1]); |
| 117 | caps.bmi1 = true; | ||
| 118 | if ((cpu_id[1] >> 8) & 1) | ||
| 119 | caps.bmi2 = true; | ||
| 120 | // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) | 119 | // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) |
| 121 | if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && | 120 | if (Common::Bit<16>(cpu_id[1]) && Common::Bit<28>(cpu_id[1]) && |
| 122 | (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { | 121 | Common::Bit<31>(cpu_id[1]) && Common::Bit<17>(cpu_id[1]) && |
| 122 | Common::Bit<30>(cpu_id[1])) { | ||
| 123 | caps.avx512 = caps.avx2; | 123 | caps.avx512 = caps.avx2; |
| 124 | } | 124 | } |
| 125 | } | 125 | } |
| @@ -138,15 +138,13 @@ static CPUCaps Detect() { | |||
| 138 | if (max_ex_fn >= 0x80000001) { | 138 | if (max_ex_fn >= 0x80000001) { |
| 139 | // Check for more features | 139 | // Check for more features |
| 140 | __cpuid(cpu_id, 0x80000001); | 140 | __cpuid(cpu_id, 0x80000001); |
| 141 | if ((cpu_id[2] >> 16) & 1) | 141 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); |
| 142 | caps.fma4 = true; | 142 | caps.fma4 = Common::Bit<16>(cpu_id[2]); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | if (max_ex_fn >= 0x80000007) { | 145 | if (max_ex_fn >= 0x80000007) { |
| 146 | __cpuid(cpu_id, 0x80000007); | 146 | __cpuid(cpu_id, 0x80000007); |
| 147 | if (cpu_id[3] & (1 << 8)) { | 147 | caps.invariant_tsc = Common::Bit<8>(cpu_id[3]); |
| 148 | caps.invariant_tsc = true; | ||
| 149 | } | ||
| 150 | } | 148 | } |
| 151 | 149 | ||
| 152 | if (max_std_fn >= 0x16) { | 150 | if (max_std_fn >= 0x16) { |
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index e3b63302e..3e6d808f3 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -1,42 +1,50 @@ | |||
| 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project | 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included. |
| 3 | // Refer to the license.txt file included. | ||
| 4 | 3 | ||
| 5 | #pragma once | 4 | #pragma once |
| 6 | 5 | ||
| 7 | namespace Common { | 6 | #include <string_view> |
| 7 | #include "common/common_types.h" | ||
| 8 | 8 | ||
| 9 | enum class Manufacturer : u32 { | 9 | namespace Common { |
| 10 | Intel = 0, | ||
| 11 | AMD = 1, | ||
| 12 | Hygon = 2, | ||
| 13 | Unknown = 3, | ||
| 14 | }; | ||
| 15 | 10 | ||
| 16 | /// x86/x64 CPU capabilities that may be detected by this module | 11 | /// x86/x64 CPU capabilities that may be detected by this module |
| 17 | struct CPUCaps { | 12 | struct CPUCaps { |
| 13 | |||
| 14 | enum class Manufacturer : u8 { | ||
| 15 | Unknown = 0, | ||
| 16 | Intel = 1, | ||
| 17 | AMD = 2, | ||
| 18 | Hygon = 3, | ||
| 19 | }; | ||
| 20 | |||
| 21 | static Manufacturer ParseManufacturer(std::string_view brand_string); | ||
| 22 | |||
| 18 | Manufacturer manufacturer; | 23 | Manufacturer manufacturer; |
| 19 | char cpu_string[0x21]; | 24 | char brand_string[13]; |
| 20 | char brand_string[0x41]; | 25 | |
| 21 | bool sse; | 26 | char cpu_string[48]; |
| 22 | bool sse2; | 27 | |
| 23 | bool sse3; | ||
| 24 | bool ssse3; | ||
| 25 | bool sse4_1; | ||
| 26 | bool sse4_2; | ||
| 27 | bool lzcnt; | ||
| 28 | bool avx; | ||
| 29 | bool avx2; | ||
| 30 | bool avx512; | ||
| 31 | bool bmi1; | ||
| 32 | bool bmi2; | ||
| 33 | bool fma; | ||
| 34 | bool fma4; | ||
| 35 | bool aes; | ||
| 36 | bool invariant_tsc; | ||
| 37 | u32 base_frequency; | 28 | u32 base_frequency; |
| 38 | u32 max_frequency; | 29 | u32 max_frequency; |
| 39 | u32 bus_frequency; | 30 | u32 bus_frequency; |
| 31 | |||
| 32 | bool sse : 1; | ||
| 33 | bool sse2 : 1; | ||
| 34 | bool sse3 : 1; | ||
| 35 | bool ssse3 : 1; | ||
| 36 | bool sse4_1 : 1; | ||
| 37 | bool sse4_2 : 1; | ||
| 38 | bool lzcnt : 1; | ||
| 39 | bool avx : 1; | ||
| 40 | bool avx2 : 1; | ||
| 41 | bool avx512 : 1; | ||
| 42 | bool bmi1 : 1; | ||
| 43 | bool bmi2 : 1; | ||
| 44 | bool fma : 1; | ||
| 45 | bool fma4 : 1; | ||
| 46 | bool aes : 1; | ||
| 47 | bool invariant_tsc : 1; | ||
| 40 | }; | 48 | }; |
| 41 | 49 | ||
| 42 | /** | 50 | /** |
diff --git a/src/core/core.cpp b/src/core/core.cpp index b0cfee3ee..c60a784c3 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -326,7 +326,9 @@ struct System::Impl { | |||
| 326 | is_powered_on = false; | 326 | is_powered_on = false; |
| 327 | exit_lock = false; | 327 | exit_lock = false; |
| 328 | 328 | ||
| 329 | gpu_core->NotifyShutdown(); | 329 | if (gpu_core != nullptr) { |
| 330 | gpu_core->NotifyShutdown(); | ||
| 331 | } | ||
| 330 | 332 | ||
| 331 | services.reset(); | 333 | services.reset(); |
| 332 | service_manager.reset(); | 334 | service_manager.reset(); |
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index e413a520a..b3bffecb2 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h | |||
| @@ -42,11 +42,20 @@ public: | |||
| 42 | context.MakeCurrent(); | 42 | context.MakeCurrent(); |
| 43 | } | 43 | } |
| 44 | ~Scoped() { | 44 | ~Scoped() { |
| 45 | context.DoneCurrent(); | 45 | if (active) { |
| 46 | context.DoneCurrent(); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | /// In the event that context was destroyed before the Scoped is destroyed, this provides a | ||
| 51 | /// mechanism to prevent calling a destroyed object's method during the deconstructor | ||
| 52 | void Cancel() { | ||
| 53 | active = false; | ||
| 46 | } | 54 | } |
| 47 | 55 | ||
| 48 | private: | 56 | private: |
| 49 | GraphicsContext& context; | 57 | GraphicsContext& context; |
| 58 | bool active{true}; | ||
| 50 | }; | 59 | }; |
| 51 | 60 | ||
| 52 | /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value | 61 | /// Calls MakeCurrent on the context and calls DoneCurrent when the scope for the returned value |
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index dfea0b6e2..0602de1f7 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp | |||
| @@ -285,72 +285,141 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory | |||
| 285 | return ResultSuccess; | 285 | return ResultSuccess; |
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { | 288 | ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) { |
| 289 | // Validate the mapping request. | ||
| 290 | R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode), | ||
| 291 | ResultInvalidMemoryRegion); | ||
| 292 | |||
| 293 | // Lock the table. | ||
| 289 | KScopedLightLock lk(general_lock); | 294 | KScopedLightLock lk(general_lock); |
| 290 | 295 | ||
| 291 | const std::size_t num_pages{size / PageSize}; | 296 | // Verify that the source memory is normal heap. |
| 297 | KMemoryState src_state{}; | ||
| 298 | KMemoryPermission src_perm{}; | ||
| 299 | std::size_t num_src_allocator_blocks{}; | ||
| 300 | R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks, | ||
| 301 | src_address, size, KMemoryState::All, KMemoryState::Normal, | ||
| 302 | KMemoryPermission::All, KMemoryPermission::UserReadWrite, | ||
| 303 | KMemoryAttribute::All, KMemoryAttribute::None)); | ||
| 292 | 304 | ||
| 293 | KMemoryState state{}; | 305 | // Verify that the destination memory is unmapped. |
| 294 | KMemoryPermission perm{}; | 306 | std::size_t num_dst_allocator_blocks{}; |
| 295 | CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size, | 307 | R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All, |
| 296 | KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All, | 308 | KMemoryState::Free, KMemoryPermission::None, |
| 297 | KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, | 309 | KMemoryPermission::None, KMemoryAttribute::None, |
| 298 | KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); | 310 | KMemoryAttribute::None)); |
| 299 | 311 | ||
| 300 | if (IsRegionMapped(dst_addr, size)) { | 312 | // Map the code memory. |
| 301 | return ResultInvalidCurrentMemory; | 313 | { |
| 302 | } | 314 | // Determine the number of pages being operated on. |
| 315 | const std::size_t num_pages = size / PageSize; | ||
| 303 | 316 | ||
| 304 | KPageLinkedList page_linked_list; | 317 | // Create page groups for the memory being mapped. |
| 305 | AddRegionToPages(src_addr, num_pages, page_linked_list); | 318 | KPageLinkedList pg; |
| 319 | AddRegionToPages(src_address, num_pages, pg); | ||
| 306 | 320 | ||
| 307 | { | 321 | // Reprotect the source as kernel-read/not mapped. |
| 308 | auto block_guard = detail::ScopeExit( | 322 | const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead | |
| 309 | [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); }); | 323 | KMemoryPermission::NotMapped); |
| 324 | R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions)); | ||
| 310 | 325 | ||
| 311 | CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None, | 326 | // Ensure that we unprotect the source pages on failure. |
| 312 | OperationType::ChangePermissions)); | 327 | auto unprot_guard = SCOPE_GUARD({ |
| 313 | CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None)); | 328 | ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions) |
| 329 | .IsSuccess()); | ||
| 330 | }); | ||
| 314 | 331 | ||
| 315 | block_guard.Cancel(); | 332 | // Map the alias pages. |
| 316 | } | 333 | R_TRY(MapPages(dst_address, pg, new_perm)); |
| 317 | 334 | ||
| 318 | block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None, | 335 | // We successfully mapped the alias pages, so we don't need to unprotect the src pages on |
| 319 | KMemoryAttribute::Locked); | 336 | // failure. |
| 320 | block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode); | 337 | unprot_guard.Cancel(); |
| 338 | |||
| 339 | // Apply the memory block updates. | ||
| 340 | block_manager->Update(src_address, num_pages, src_state, new_perm, | ||
| 341 | KMemoryAttribute::Locked); | ||
| 342 | block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm, | ||
| 343 | KMemoryAttribute::None); | ||
| 344 | } | ||
| 321 | 345 | ||
| 322 | return ResultSuccess; | 346 | return ResultSuccess; |
| 323 | } | 347 | } |
| 324 | 348 | ||
| 325 | ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { | 349 | ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) { |
| 350 | // Validate the mapping request. | ||
| 351 | R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode), | ||
| 352 | ResultInvalidMemoryRegion); | ||
| 353 | |||
| 354 | // Lock the table. | ||
| 326 | KScopedLightLock lk(general_lock); | 355 | KScopedLightLock lk(general_lock); |
| 327 | 356 | ||
| 328 | if (!size) { | 357 | // Verify that the source memory is locked normal heap. |
| 329 | return ResultSuccess; | 358 | std::size_t num_src_allocator_blocks{}; |
| 359 | R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size, | ||
| 360 | KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None, | ||
| 361 | KMemoryPermission::None, KMemoryAttribute::All, | ||
| 362 | KMemoryAttribute::Locked)); | ||
| 363 | |||
| 364 | // Verify that the destination memory is aliasable code. | ||
| 365 | std::size_t num_dst_allocator_blocks{}; | ||
| 366 | R_TRY(this->CheckMemoryStateContiguous( | ||
| 367 | std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias, | ||
| 368 | KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, | ||
| 369 | KMemoryAttribute::All, KMemoryAttribute::None)); | ||
| 370 | |||
| 371 | // Determine whether any pages being unmapped are code. | ||
| 372 | bool any_code_pages = false; | ||
| 373 | { | ||
| 374 | KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address); | ||
| 375 | while (true) { | ||
| 376 | // Get the memory info. | ||
| 377 | const KMemoryInfo info = it->GetMemoryInfo(); | ||
| 378 | |||
| 379 | // Check if the memory has code flag. | ||
| 380 | if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) { | ||
| 381 | any_code_pages = true; | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | |||
| 385 | // Check if we're done. | ||
| 386 | if (dst_address + size - 1 <= info.GetLastAddress()) { | ||
| 387 | break; | ||
| 388 | } | ||
| 389 | |||
| 390 | // Advance. | ||
| 391 | ++it; | ||
| 392 | } | ||
| 330 | } | 393 | } |
| 331 | 394 | ||
| 332 | const std::size_t num_pages{size / PageSize}; | 395 | // Ensure that we maintain the instruction cache. |
| 396 | bool reprotected_pages = false; | ||
| 397 | SCOPE_EXIT({ | ||
| 398 | if (reprotected_pages && any_code_pages) { | ||
| 399 | system.InvalidateCpuInstructionCacheRange(dst_address, size); | ||
| 400 | } | ||
| 401 | }); | ||
| 333 | 402 | ||
| 334 | CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size, | 403 | // Unmap. |
| 335 | KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None, | 404 | { |
| 336 | KMemoryPermission::None, KMemoryAttribute::Mask, | 405 | // Determine the number of pages being operated on. |
| 337 | KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); | 406 | const std::size_t num_pages = size / PageSize; |
| 338 | 407 | ||
| 339 | KMemoryState state{}; | 408 | // Unmap the aliased copy of the pages. |
| 340 | CASCADE_CODE(CheckMemoryState( | 409 | R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap)); |
| 341 | &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias, | ||
| 342 | KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None, | ||
| 343 | KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); | ||
| 344 | CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None, | ||
| 345 | KMemoryPermission::None, KMemoryAttribute::Mask, | ||
| 346 | KMemoryAttribute::None)); | ||
| 347 | CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap)); | ||
| 348 | 410 | ||
| 349 | block_manager->Update(dst_addr, num_pages, KMemoryState::Free); | 411 | // Try to set the permissions for the source pages back to what they should be. |
| 350 | block_manager->Update(src_addr, num_pages, KMemoryState::Normal, | 412 | R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite, |
| 351 | KMemoryPermission::UserReadWrite); | 413 | OperationType::ChangePermissions)); |
| 352 | 414 | ||
| 353 | system.InvalidateCpuInstructionCacheRange(dst_addr, size); | 415 | // Apply the memory block updates. |
| 416 | block_manager->Update(dst_address, num_pages, KMemoryState::None); | ||
| 417 | block_manager->Update(src_address, num_pages, KMemoryState::Normal, | ||
| 418 | KMemoryPermission::UserReadWrite); | ||
| 419 | |||
| 420 | // Note that we reprotected pages. | ||
| 421 | reprotected_pages = true; | ||
| 422 | } | ||
| 354 | 423 | ||
| 355 | return ResultSuccess; | 424 | return ResultSuccess; |
| 356 | } | 425 | } |
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index 194177332..e99abe36a 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h | |||
| @@ -36,8 +36,8 @@ public: | |||
| 36 | KMemoryManager::Pool pool); | 36 | KMemoryManager::Pool pool); |
| 37 | ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state, | 37 | ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state, |
| 38 | KMemoryPermission perm); | 38 | KMemoryPermission perm); |
| 39 | ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); | 39 | ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size); |
| 40 | ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size); | 40 | ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size); |
| 41 | ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, | 41 | ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, |
| 42 | VAddr src_addr); | 42 | VAddr src_addr); |
| 43 | ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); | 43 | ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); |
| @@ -253,7 +253,9 @@ public: | |||
| 253 | constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { | 253 | constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { |
| 254 | return !IsOutsideASLRRegion(address, size); | 254 | return !IsOutsideASLRRegion(address, size); |
| 255 | } | 255 | } |
| 256 | 256 | constexpr std::size_t GetNumGuardPages() const { | |
| 257 | return IsKernel() ? 1 : 4; | ||
| 258 | } | ||
| 257 | PAddr GetPhysicalAddr(VAddr addr) const { | 259 | PAddr GetPhysicalAddr(VAddr addr) const { |
| 258 | const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; | 260 | const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; |
| 259 | ASSERT(backing_addr); | 261 | ASSERT(backing_addr); |
| @@ -275,10 +277,6 @@ private: | |||
| 275 | return is_aslr_enabled; | 277 | return is_aslr_enabled; |
| 276 | } | 278 | } |
| 277 | 279 | ||
| 278 | constexpr std::size_t GetNumGuardPages() const { | ||
| 279 | return IsKernel() ? 1 : 4; | ||
| 280 | } | ||
| 281 | |||
| 282 | constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const { | 280 | constexpr bool ContainsPages(VAddr addr, std::size_t num_pages) const { |
| 283 | return (address_space_start <= addr) && | 281 | return (address_space_start <= addr) && |
| 284 | (num_pages <= (address_space_end - address_space_start) / PageSize) && | 282 | (num_pages <= (address_space_end - address_space_start) / PageSize) && |
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 9fc7bb1b1..099276420 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp | |||
| @@ -288,7 +288,7 @@ public: | |||
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const { | 290 | bool ValidateRegionForMap(Kernel::KPageTable& page_table, VAddr start, std::size_t size) const { |
| 291 | constexpr std::size_t padding_size{4 * Kernel::PageSize}; | 291 | const std::size_t padding_size{page_table.GetNumGuardPages() * Kernel::PageSize}; |
| 292 | const auto start_info{page_table.QueryInfo(start - 1)}; | 292 | const auto start_info{page_table.QueryInfo(start - 1)}; |
| 293 | 293 | ||
| 294 | if (start_info.state != Kernel::KMemoryState::Free) { | 294 | if (start_info.state != Kernel::KMemoryState::Free) { |
| @@ -308,31 +308,69 @@ public: | |||
| 308 | return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); | 308 | return (start + size + padding_size) <= (end_info.GetAddress() + end_info.GetSize()); |
| 309 | } | 309 | } |
| 310 | 310 | ||
| 311 | VAddr GetRandomMapRegion(const Kernel::KPageTable& page_table, std::size_t size) const { | 311 | ResultCode GetAvailableMapRegion(Kernel::KPageTable& page_table, u64 size, VAddr& out_addr) { |
| 312 | VAddr addr{}; | 312 | size = Common::AlignUp(size, Kernel::PageSize); |
| 313 | const std::size_t end_pages{(page_table.GetAliasCodeRegionSize() - size) >> | 313 | size += page_table.GetNumGuardPages() * Kernel::PageSize * 4; |
| 314 | Kernel::PageBits}; | 314 | |
| 315 | do { | 315 | const auto is_region_available = [&](VAddr addr) { |
| 316 | addr = page_table.GetAliasCodeRegionStart() + | 316 | const auto end_addr = addr + size; |
| 317 | (Kernel::KSystemControl::GenerateRandomRange(0, end_pages) << Kernel::PageBits); | 317 | while (addr < end_addr) { |
| 318 | } while (!page_table.IsInsideAddressSpace(addr, size) || | 318 | if (system.Memory().IsValidVirtualAddress(addr)) { |
| 319 | page_table.IsInsideHeapRegion(addr, size) || | 319 | return false; |
| 320 | page_table.IsInsideAliasRegion(addr, size)); | 320 | } |
| 321 | return addr; | 321 | |
| 322 | if (!page_table.IsInsideAddressSpace(out_addr, size)) { | ||
| 323 | return false; | ||
| 324 | } | ||
| 325 | |||
| 326 | if (page_table.IsInsideHeapRegion(out_addr, size)) { | ||
| 327 | return false; | ||
| 328 | } | ||
| 329 | |||
| 330 | if (page_table.IsInsideAliasRegion(out_addr, size)) { | ||
| 331 | return false; | ||
| 332 | } | ||
| 333 | |||
| 334 | addr += Kernel::PageSize; | ||
| 335 | } | ||
| 336 | return true; | ||
| 337 | }; | ||
| 338 | |||
| 339 | bool succeeded = false; | ||
| 340 | const auto map_region_end = | ||
| 341 | page_table.GetAliasCodeRegionStart() + page_table.GetAliasCodeRegionSize(); | ||
| 342 | while (current_map_addr < map_region_end) { | ||
| 343 | if (is_region_available(current_map_addr)) { | ||
| 344 | succeeded = true; | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | current_map_addr += 0x100000; | ||
| 348 | } | ||
| 349 | |||
| 350 | if (!succeeded) { | ||
| 351 | UNREACHABLE_MSG("Out of address space!"); | ||
| 352 | return Kernel::ResultOutOfMemory; | ||
| 353 | } | ||
| 354 | |||
| 355 | out_addr = current_map_addr; | ||
| 356 | current_map_addr += size; | ||
| 357 | |||
| 358 | return ResultSuccess; | ||
| 322 | } | 359 | } |
| 323 | 360 | ||
| 324 | ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr baseAddress, | 361 | ResultVal<VAddr> MapProcessCodeMemory(Kernel::KProcess* process, VAddr base_addr, u64 size) { |
| 325 | u64 size) const { | 362 | auto& page_table{process->PageTable()}; |
| 363 | VAddr addr{}; | ||
| 364 | |||
| 326 | for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { | 365 | for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { |
| 327 | auto& page_table{process->PageTable()}; | 366 | R_TRY(GetAvailableMapRegion(page_table, size, addr)); |
| 328 | const VAddr addr{GetRandomMapRegion(page_table, size)}; | ||
| 329 | const ResultCode result{page_table.MapCodeMemory(addr, baseAddress, size)}; | ||
| 330 | 367 | ||
| 368 | const ResultCode result{page_table.MapCodeMemory(addr, base_addr, size)}; | ||
| 331 | if (result == Kernel::ResultInvalidCurrentMemory) { | 369 | if (result == Kernel::ResultInvalidCurrentMemory) { |
| 332 | continue; | 370 | continue; |
| 333 | } | 371 | } |
| 334 | 372 | ||
| 335 | CASCADE_CODE(result); | 373 | R_TRY(result); |
| 336 | 374 | ||
| 337 | if (ValidateRegionForMap(page_table, addr, size)) { | 375 | if (ValidateRegionForMap(page_table, addr, size)) { |
| 338 | return addr; | 376 | return addr; |
| @@ -343,7 +381,7 @@ public: | |||
| 343 | } | 381 | } |
| 344 | 382 | ||
| 345 | ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size, | 383 | ResultVal<VAddr> MapNro(Kernel::KProcess* process, VAddr nro_addr, std::size_t nro_size, |
| 346 | VAddr bss_addr, std::size_t bss_size, std::size_t size) const { | 384 | VAddr bss_addr, std::size_t bss_size, std::size_t size) { |
| 347 | for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { | 385 | for (std::size_t retry = 0; retry < MAXIMUM_MAP_RETRIES; retry++) { |
| 348 | auto& page_table{process->PageTable()}; | 386 | auto& page_table{process->PageTable()}; |
| 349 | VAddr addr{}; | 387 | VAddr addr{}; |
| @@ -597,6 +635,7 @@ public: | |||
| 597 | LOG_WARNING(Service_LDR, "(STUBBED) called"); | 635 | LOG_WARNING(Service_LDR, "(STUBBED) called"); |
| 598 | 636 | ||
| 599 | initialized = true; | 637 | initialized = true; |
| 638 | current_map_addr = system.CurrentProcess()->PageTable().GetAliasCodeRegionStart(); | ||
| 600 | 639 | ||
| 601 | IPC::ResponseBuilder rb{ctx, 2}; | 640 | IPC::ResponseBuilder rb{ctx, 2}; |
| 602 | rb.Push(ResultSuccess); | 641 | rb.Push(ResultSuccess); |
| @@ -607,6 +646,7 @@ private: | |||
| 607 | 646 | ||
| 608 | std::map<VAddr, NROInfo> nro; | 647 | std::map<VAddr, NROInfo> nro; |
| 609 | std::map<VAddr, std::vector<SHA256Hash>> nrr; | 648 | std::map<VAddr, std::vector<SHA256Hash>> nrr; |
| 649 | VAddr current_map_addr{}; | ||
| 610 | 650 | ||
| 611 | bool IsValidNROHash(const SHA256Hash& hash) const { | 651 | bool IsValidNROHash(const SHA256Hash& hash) const { |
| 612 | return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) { | 652 | return std::any_of(nrr.begin(), nrr.end(), [&hash](const auto& p) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp index e0fe47912..f3c7ceb57 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | |||
| @@ -13,59 +13,535 @@ namespace { | |||
| 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) | 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) |
| 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, |
| 15 | u64 ttbl) { | 15 | u64 ttbl) { |
| 16 | IR::U32 r{ir.Imm32(0)}; | 16 | switch (ttbl) { |
| 17 | const IR::U32 not_a{ir.BitwiseNot(a)}; | 17 | // generated code, do not edit manually |
| 18 | const IR::U32 not_b{ir.BitwiseNot(b)}; | 18 | case 0: |
| 19 | const IR::U32 not_c{ir.BitwiseNot(c)}; | 19 | return ir.Imm32(0); |
| 20 | if (ttbl & 0x01) { | 20 | case 1: |
| 21 | // r |= ~a & ~b & ~c; | 21 | return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c))); |
| 22 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | 22 | case 2: |
| 23 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | 23 | return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); |
| 24 | r = ir.BitwiseOr(r, rhs); | 24 | case 3: |
| 25 | return ir.BitwiseNot(ir.BitwiseOr(a, b)); | ||
| 26 | case 4: | ||
| 27 | return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); | ||
| 28 | case 5: | ||
| 29 | return ir.BitwiseNot(ir.BitwiseOr(a, c)); | ||
| 30 | case 6: | ||
| 31 | return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); | ||
| 32 | case 7: | ||
| 33 | return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c))); | ||
| 34 | case 8: | ||
| 35 | return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); | ||
| 36 | case 9: | ||
| 37 | return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c))); | ||
| 38 | case 10: | ||
| 39 | return ir.BitwiseAnd(c, ir.BitwiseNot(a)); | ||
| 40 | case 11: | ||
| 41 | return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b))); | ||
| 42 | case 12: | ||
| 43 | return ir.BitwiseAnd(b, ir.BitwiseNot(a)); | ||
| 44 | case 13: | ||
| 45 | return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c))); | ||
| 46 | case 14: | ||
| 47 | return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); | ||
| 48 | case 15: | ||
| 49 | return ir.BitwiseNot(a); | ||
| 50 | case 16: | ||
| 51 | return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); | ||
| 52 | case 17: | ||
| 53 | return ir.BitwiseNot(ir.BitwiseOr(b, c)); | ||
| 54 | case 18: | ||
| 55 | return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); | ||
| 56 | case 19: | ||
| 57 | return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c))); | ||
| 58 | case 20: | ||
| 59 | return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); | ||
| 60 | case 21: | ||
| 61 | return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); | ||
| 62 | case 22: | ||
| 63 | return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); | ||
| 64 | case 23: | ||
| 65 | return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)), | ||
| 66 | ir.BitwiseNot(a)); | ||
| 67 | case 24: | ||
| 68 | return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); | ||
| 69 | case 25: | ||
| 70 | return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c))); | ||
| 71 | case 26: | ||
| 72 | return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); | ||
| 73 | case 27: | ||
| 74 | return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); | ||
| 75 | case 28: | ||
| 76 | return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); | ||
| 77 | case 29: | ||
| 78 | return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); | ||
| 79 | case 30: | ||
| 80 | return ir.BitwiseXor(a, ir.BitwiseOr(b, c)); | ||
| 81 | case 31: | ||
| 82 | return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c))); | ||
| 83 | case 32: | ||
| 84 | return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); | ||
| 85 | case 33: | ||
| 86 | return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c))); | ||
| 87 | case 34: | ||
| 88 | return ir.BitwiseAnd(c, ir.BitwiseNot(b)); | ||
| 89 | case 35: | ||
| 90 | return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a))); | ||
| 91 | case 36: | ||
| 92 | return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c)); | ||
| 93 | case 37: | ||
| 94 | return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c))); | ||
| 95 | case 38: | ||
| 96 | return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); | ||
| 97 | case 39: | ||
| 98 | return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); | ||
| 99 | case 40: | ||
| 100 | return ir.BitwiseAnd(c, ir.BitwiseXor(a, b)); | ||
| 101 | case 41: | ||
| 102 | return ir.BitwiseXor(ir.BitwiseOr(a, b), | ||
| 103 | ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c))); | ||
| 104 | case 42: | ||
| 105 | return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); | ||
| 106 | case 43: | ||
| 107 | return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), | ||
| 108 | ir.BitwiseOr(b, ir.BitwiseXor(a, c))); | ||
| 109 | case 44: | ||
| 110 | return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)); | ||
| 111 | case 45: | ||
| 112 | return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); | ||
| 113 | case 46: | ||
| 114 | return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c)); | ||
| 115 | case 47: | ||
| 116 | return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a)); | ||
| 117 | case 48: | ||
| 118 | return ir.BitwiseAnd(a, ir.BitwiseNot(b)); | ||
| 119 | case 49: | ||
| 120 | return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c))); | ||
| 121 | case 50: | ||
| 122 | return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); | ||
| 123 | case 51: | ||
| 124 | return ir.BitwiseNot(b); | ||
| 125 | case 52: | ||
| 126 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); | ||
| 127 | case 53: | ||
| 128 | return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); | ||
| 129 | case 54: | ||
| 130 | return ir.BitwiseXor(b, ir.BitwiseOr(a, c)); | ||
| 131 | case 55: | ||
| 132 | return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c))); | ||
| 133 | case 56: | ||
| 134 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)); | ||
| 135 | case 57: | ||
| 136 | return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); | ||
| 137 | case 58: | ||
| 138 | return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c)); | ||
| 139 | case 59: | ||
| 140 | return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b)); | ||
| 141 | case 60: | ||
| 142 | return ir.BitwiseXor(a, b); | ||
| 143 | case 61: | ||
| 144 | return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b)); | ||
| 145 | case 62: | ||
| 146 | return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b)); | ||
| 147 | case 63: | ||
| 148 | return ir.BitwiseNot(ir.BitwiseAnd(a, b)); | ||
| 149 | case 64: | ||
| 150 | return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); | ||
| 151 | case 65: | ||
| 152 | return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b))); | ||
| 153 | case 66: | ||
| 154 | return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c)); | ||
| 155 | case 67: | ||
| 156 | return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b))); | ||
| 157 | case 68: | ||
| 158 | return ir.BitwiseAnd(b, ir.BitwiseNot(c)); | ||
| 159 | case 69: | ||
| 160 | return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a))); | ||
| 161 | case 70: | ||
| 162 | return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); | ||
| 163 | case 71: | ||
| 164 | return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); | ||
| 165 | case 72: | ||
| 166 | return ir.BitwiseAnd(b, ir.BitwiseXor(a, c)); | ||
| 167 | case 73: | ||
| 168 | return ir.BitwiseXor(ir.BitwiseOr(a, c), | ||
| 169 | ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b))); | ||
| 170 | case 74: | ||
| 171 | return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)); | ||
| 172 | case 75: | ||
| 173 | return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); | ||
| 174 | case 76: | ||
| 175 | return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); | ||
| 176 | case 77: | ||
| 177 | return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), | ||
| 178 | ir.BitwiseOr(c, ir.BitwiseXor(a, b))); | ||
| 179 | case 78: | ||
| 180 | return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c)); | ||
| 181 | case 79: | ||
| 182 | return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a)); | ||
| 183 | case 80: | ||
| 184 | return ir.BitwiseAnd(a, ir.BitwiseNot(c)); | ||
| 185 | case 81: | ||
| 186 | return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b))); | ||
| 187 | case 82: | ||
| 188 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); | ||
| 189 | case 83: | ||
| 190 | return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); | ||
| 191 | case 84: | ||
| 192 | return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); | ||
| 193 | case 85: | ||
| 194 | return ir.BitwiseNot(c); | ||
| 195 | case 86: | ||
| 196 | return ir.BitwiseXor(c, ir.BitwiseOr(a, b)); | ||
| 197 | case 87: | ||
| 198 | return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b))); | ||
| 199 | case 88: | ||
| 200 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)); | ||
| 201 | case 89: | ||
| 202 | return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); | ||
| 203 | case 90: | ||
| 204 | return ir.BitwiseXor(a, c); | ||
| 205 | case 91: | ||
| 206 | return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c)); | ||
| 207 | case 92: | ||
| 208 | return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b)); | ||
| 209 | case 93: | ||
| 210 | return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c)); | ||
| 211 | case 94: | ||
| 212 | return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c)); | ||
| 213 | case 95: | ||
| 214 | return ir.BitwiseNot(ir.BitwiseAnd(a, c)); | ||
| 215 | case 96: | ||
| 216 | return ir.BitwiseAnd(a, ir.BitwiseXor(b, c)); | ||
| 217 | case 97: | ||
| 218 | return ir.BitwiseXor(ir.BitwiseOr(b, c), | ||
| 219 | ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a))); | ||
| 220 | case 98: | ||
| 221 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)); | ||
| 222 | case 99: | ||
| 223 | return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); | ||
| 224 | case 100: | ||
| 225 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)); | ||
| 226 | case 101: | ||
| 227 | return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); | ||
| 228 | case 102: | ||
| 229 | return ir.BitwiseXor(b, c); | ||
| 230 | case 103: | ||
| 231 | return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c)); | ||
| 232 | case 104: | ||
| 233 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b))); | ||
| 234 | case 105: | ||
| 235 | return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); | ||
| 236 | case 106: | ||
| 237 | return ir.BitwiseXor(c, ir.BitwiseAnd(a, b)); | ||
| 238 | case 107: | ||
| 239 | return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)), | ||
| 240 | ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 241 | case 108: | ||
| 242 | return ir.BitwiseXor(b, ir.BitwiseAnd(a, c)); | ||
| 243 | case 109: | ||
| 244 | return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)), | ||
| 245 | ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 246 | case 110: | ||
| 247 | return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c)); | ||
| 248 | case 111: | ||
| 249 | return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c)); | ||
| 250 | case 112: | ||
| 251 | return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); | ||
| 252 | case 113: | ||
| 253 | return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)), | ||
| 254 | ir.BitwiseOr(c, ir.BitwiseXor(a, b))); | ||
| 255 | case 114: | ||
| 256 | return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c)); | ||
| 257 | case 115: | ||
| 258 | return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b)); | ||
| 259 | case 116: | ||
| 260 | return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b)); | ||
| 261 | case 117: | ||
| 262 | return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c)); | ||
| 263 | case 118: | ||
| 264 | return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); | ||
| 265 | case 119: | ||
| 266 | return ir.BitwiseNot(ir.BitwiseAnd(b, c)); | ||
| 267 | case 120: | ||
| 268 | return ir.BitwiseXor(a, ir.BitwiseAnd(b, c)); | ||
| 269 | case 121: | ||
| 270 | return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)), | ||
| 271 | ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 272 | case 122: | ||
| 273 | return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c)); | ||
| 274 | case 123: | ||
| 275 | return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c)); | ||
| 276 | case 124: | ||
| 277 | return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b)); | ||
| 278 | case 125: | ||
| 279 | return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b)); | ||
| 280 | case 126: | ||
| 281 | return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)); | ||
| 282 | case 127: | ||
| 283 | return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c))); | ||
| 284 | case 128: | ||
| 285 | return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)); | ||
| 286 | case 129: | ||
| 287 | return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); | ||
| 288 | case 130: | ||
| 289 | return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 290 | case 131: | ||
| 291 | return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 292 | case 132: | ||
| 293 | return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 294 | case 133: | ||
| 295 | return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 296 | case 134: | ||
| 297 | return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); | ||
| 298 | case 135: | ||
| 299 | return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); | ||
| 300 | case 136: | ||
| 301 | return ir.BitwiseAnd(b, c); | ||
| 302 | case 137: | ||
| 303 | return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 304 | case 138: | ||
| 305 | return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a))); | ||
| 306 | case 139: | ||
| 307 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); | ||
| 308 | case 140: | ||
| 309 | return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a))); | ||
| 310 | case 141: | ||
| 311 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c))); | ||
| 312 | case 142: | ||
| 313 | return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); | ||
| 314 | case 143: | ||
| 315 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)); | ||
| 316 | case 144: | ||
| 317 | return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 318 | case 145: | ||
| 319 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 320 | case 146: | ||
| 321 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); | ||
| 322 | case 147: | ||
| 323 | return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); | ||
| 324 | case 148: | ||
| 325 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c))); | ||
| 326 | case 149: | ||
| 327 | return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); | ||
| 328 | case 150: | ||
| 329 | return ir.BitwiseXor(a, ir.BitwiseXor(b, c)); | ||
| 330 | case 151: | ||
| 331 | return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), | ||
| 332 | ir.BitwiseXor(a, ir.BitwiseXor(b, c))); | ||
| 333 | case 152: | ||
| 334 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 335 | case 153: | ||
| 336 | return ir.BitwiseXor(b, ir.BitwiseNot(c)); | ||
| 337 | case 154: | ||
| 338 | return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); | ||
| 339 | case 155: | ||
| 340 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c))); | ||
| 341 | case 156: | ||
| 342 | return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); | ||
| 343 | case 157: | ||
| 344 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c))); | ||
| 345 | case 158: | ||
| 346 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c))); | ||
| 347 | case 159: | ||
| 348 | return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c))); | ||
| 349 | case 160: | ||
| 350 | return ir.BitwiseAnd(a, c); | ||
| 351 | case 161: | ||
| 352 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 353 | case 162: | ||
| 354 | return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b))); | ||
| 355 | case 163: | ||
| 356 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b))); | ||
| 357 | case 164: | ||
| 358 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 359 | case 165: | ||
| 360 | return ir.BitwiseXor(a, ir.BitwiseNot(c)); | ||
| 361 | case 166: | ||
| 362 | return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); | ||
| 363 | case 167: | ||
| 364 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c))); | ||
| 365 | case 168: | ||
| 366 | return ir.BitwiseAnd(c, ir.BitwiseOr(a, b)); | ||
| 367 | case 169: | ||
| 368 | return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); | ||
| 369 | case 170: | ||
| 370 | return c; | ||
| 371 | case 171: | ||
| 372 | return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b))); | ||
| 373 | case 172: | ||
| 374 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a))); | ||
| 375 | case 173: | ||
| 376 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 377 | case 174: | ||
| 378 | return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a))); | ||
| 379 | case 175: | ||
| 380 | return ir.BitwiseOr(c, ir.BitwiseNot(a)); | ||
| 381 | case 176: | ||
| 382 | return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b))); | ||
| 383 | case 177: | ||
| 384 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c))); | ||
| 385 | case 178: | ||
| 386 | return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); | ||
| 387 | case 179: | ||
| 388 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)); | ||
| 389 | case 180: | ||
| 390 | return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); | ||
| 391 | case 181: | ||
| 392 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c))); | ||
| 393 | case 182: | ||
| 394 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c))); | ||
| 395 | case 183: | ||
| 396 | return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c))); | ||
| 397 | case 184: | ||
| 398 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b))); | ||
| 399 | case 185: | ||
| 400 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 401 | case 186: | ||
| 402 | return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b))); | ||
| 403 | case 187: | ||
| 404 | return ir.BitwiseOr(c, ir.BitwiseNot(b)); | ||
| 405 | case 188: | ||
| 406 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)); | ||
| 407 | case 189: | ||
| 408 | return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 409 | case 190: | ||
| 410 | return ir.BitwiseOr(c, ir.BitwiseXor(a, b)); | ||
| 411 | case 191: | ||
| 412 | return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b))); | ||
| 413 | case 192: | ||
| 414 | return ir.BitwiseAnd(a, b); | ||
| 415 | case 193: | ||
| 416 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 417 | case 194: | ||
| 418 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 419 | case 195: | ||
| 420 | return ir.BitwiseXor(a, ir.BitwiseNot(b)); | ||
| 421 | case 196: | ||
| 422 | return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c))); | ||
| 423 | case 197: | ||
| 424 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c))); | ||
| 425 | case 198: | ||
| 426 | return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); | ||
| 427 | case 199: | ||
| 428 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b))); | ||
| 429 | case 200: | ||
| 430 | return ir.BitwiseAnd(b, ir.BitwiseOr(a, c)); | ||
| 431 | case 201: | ||
| 432 | return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); | ||
| 433 | case 202: | ||
| 434 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a))); | ||
| 435 | case 203: | ||
| 436 | return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 437 | case 204: | ||
| 438 | return b; | ||
| 439 | case 205: | ||
| 440 | return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c))); | ||
| 441 | case 206: | ||
| 442 | return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a))); | ||
| 443 | case 207: | ||
| 444 | return ir.BitwiseOr(b, ir.BitwiseNot(a)); | ||
| 445 | case 208: | ||
| 446 | return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c))); | ||
| 447 | case 209: | ||
| 448 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c))); | ||
| 449 | case 210: | ||
| 450 | return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); | ||
| 451 | case 211: | ||
| 452 | return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b))); | ||
| 453 | case 212: | ||
| 454 | return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c))); | ||
| 455 | case 213: | ||
| 456 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)); | ||
| 457 | case 214: | ||
| 458 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b))); | ||
| 459 | case 215: | ||
| 460 | return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b))); | ||
| 461 | case 216: | ||
| 462 | return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c))); | ||
| 463 | case 217: | ||
| 464 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 465 | case 218: | ||
| 466 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)); | ||
| 467 | case 219: | ||
| 468 | return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 469 | case 220: | ||
| 470 | return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c))); | ||
| 471 | case 221: | ||
| 472 | return ir.BitwiseOr(b, ir.BitwiseNot(c)); | ||
| 473 | case 222: | ||
| 474 | return ir.BitwiseOr(b, ir.BitwiseXor(a, c)); | ||
| 475 | case 223: | ||
| 476 | return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c))); | ||
| 477 | case 224: | ||
| 478 | return ir.BitwiseAnd(a, ir.BitwiseOr(b, c)); | ||
| 479 | case 225: | ||
| 480 | return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); | ||
| 481 | case 226: | ||
| 482 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c)); | ||
| 483 | case 227: | ||
| 484 | return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 485 | case 228: | ||
| 486 | return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c)); | ||
| 487 | case 229: | ||
| 488 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 489 | case 230: | ||
| 490 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)); | ||
| 491 | case 231: | ||
| 492 | return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c)); | ||
| 493 | case 232: | ||
| 494 | return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b))); | ||
| 495 | case 233: | ||
| 496 | return ir.BitwiseOr(ir.BitwiseAnd(a, b), | ||
| 497 | ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b))); | ||
| 498 | case 234: | ||
| 499 | return ir.BitwiseOr(c, ir.BitwiseAnd(a, b)); | ||
| 500 | case 235: | ||
| 501 | return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b))); | ||
| 502 | case 236: | ||
| 503 | return ir.BitwiseOr(b, ir.BitwiseAnd(a, c)); | ||
| 504 | case 237: | ||
| 505 | return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c))); | ||
| 506 | case 238: | ||
| 507 | return ir.BitwiseOr(b, c); | ||
| 508 | case 239: | ||
| 509 | return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c)); | ||
| 510 | case 240: | ||
| 511 | return a; | ||
| 512 | case 241: | ||
| 513 | return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c))); | ||
| 514 | case 242: | ||
| 515 | return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b))); | ||
| 516 | case 243: | ||
| 517 | return ir.BitwiseOr(a, ir.BitwiseNot(b)); | ||
| 518 | case 244: | ||
| 519 | return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c))); | ||
| 520 | case 245: | ||
| 521 | return ir.BitwiseOr(a, ir.BitwiseNot(c)); | ||
| 522 | case 246: | ||
| 523 | return ir.BitwiseOr(a, ir.BitwiseXor(b, c)); | ||
| 524 | case 247: | ||
| 525 | return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c))); | ||
| 526 | case 248: | ||
| 527 | return ir.BitwiseOr(a, ir.BitwiseAnd(b, c)); | ||
| 528 | case 249: | ||
| 529 | return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c))); | ||
| 530 | case 250: | ||
| 531 | return ir.BitwiseOr(a, c); | ||
| 532 | case 251: | ||
| 533 | return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c)); | ||
| 534 | case 252: | ||
| 535 | return ir.BitwiseOr(a, b); | ||
| 536 | case 253: | ||
| 537 | return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b)); | ||
| 538 | case 254: | ||
| 539 | return ir.BitwiseOr(a, ir.BitwiseOr(b, c)); | ||
| 540 | case 255: | ||
| 541 | return ir.Imm32(0xFFFFFFFF); | ||
| 542 | // end of generated code | ||
| 25 | } | 543 | } |
| 26 | if (ttbl & 0x02) { | 544 | throw NotImplementedException("LOP3 with out of range ttbl"); |
| 27 | // r |= ~a & ~b & c; | ||
| 28 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 29 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 30 | r = ir.BitwiseOr(r, rhs); | ||
| 31 | } | ||
| 32 | if (ttbl & 0x04) { | ||
| 33 | // r |= ~a & b & ~c; | ||
| 34 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 35 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 36 | r = ir.BitwiseOr(r, rhs); | ||
| 37 | } | ||
| 38 | if (ttbl & 0x08) { | ||
| 39 | // r |= ~a & b & c; | ||
| 40 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 41 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 42 | r = ir.BitwiseOr(r, rhs); | ||
| 43 | } | ||
| 44 | if (ttbl & 0x10) { | ||
| 45 | // r |= a & ~b & ~c; | ||
| 46 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 47 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 48 | r = ir.BitwiseOr(r, rhs); | ||
| 49 | } | ||
| 50 | if (ttbl & 0x20) { | ||
| 51 | // r |= a & ~b & c; | ||
| 52 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 53 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 54 | r = ir.BitwiseOr(r, rhs); | ||
| 55 | } | ||
| 56 | if (ttbl & 0x40) { | ||
| 57 | // r |= a & b & ~c; | ||
| 58 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 59 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 60 | r = ir.BitwiseOr(r, rhs); | ||
| 61 | } | ||
| 62 | if (ttbl & 0x80) { | ||
| 63 | // r |= a & b & c; | ||
| 64 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 65 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 66 | r = ir.BitwiseOr(r, rhs); | ||
| 67 | } | ||
| 68 | return r; | ||
| 69 | } | 545 | } |
| 70 | 546 | ||
| 71 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | 547 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py new file mode 100644 index 000000000..8f547c266 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | # Copyright © 2022 degasus <markus@selfnet.de> | ||
| 2 | # This work is free. You can redistribute it and/or modify it under the | ||
| 3 | # terms of the Do What The Fuck You Want To Public License, Version 2, | ||
| 4 | # as published by Sam Hocevar. See http://www.wtfpl.net/ for more details. | ||
| 5 | |||
| 6 | from itertools import product | ||
| 7 | |||
| 8 | # The primitive instructions | ||
| 9 | OPS = { | ||
| 10 | 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b), | ||
| 11 | 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b), | ||
| 12 | 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b), | ||
| 13 | 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions | ||
| 14 | } | ||
| 15 | |||
| 16 | # Our database of combination of instructions | ||
| 17 | optimized_calls = {} | ||
| 18 | def cmp(lhs, rhs): | ||
| 19 | if lhs is None: # new entry | ||
| 20 | return True | ||
| 21 | if lhs[3] > rhs[3]: # costs | ||
| 22 | return True | ||
| 23 | if lhs[3] < rhs[3]: # costs | ||
| 24 | return False | ||
| 25 | if len(lhs[0]) > len(rhs[0]): # string len | ||
| 26 | return True | ||
| 27 | if len(lhs[0]) < len(rhs[0]): # string len | ||
| 28 | return False | ||
| 29 | if lhs[0] > rhs[0]: # string sorting | ||
| 30 | return True | ||
| 31 | if lhs[0] < rhs[0]: # string sorting | ||
| 32 | return False | ||
| 33 | assert lhs == rhs, "redundant instruction, bug in brute force" | ||
| 34 | return False | ||
| 35 | def register(imm, instruction, count, latency): | ||
| 36 | # Use the sum of instruction count and latency as costs to evaluate which combination is best | ||
| 37 | costs = count + latency | ||
| 38 | |||
| 39 | old = optimized_calls.get(imm, None) | ||
| 40 | new = (instruction, count, latency, costs) | ||
| 41 | |||
| 42 | # Update if new or better | ||
| 43 | if cmp(old, new): | ||
| 44 | optimized_calls[imm] = new | ||
| 45 | return True | ||
| 46 | |||
| 47 | return False | ||
| 48 | |||
| 49 | # Constants: 0, 1 (for free) | ||
| 50 | register(0, 'ir.Imm32(0)', 0, 0) | ||
| 51 | register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0) | ||
| 52 | |||
| 53 | # Inputs: a, b, c (for free) | ||
| 54 | ta = 0xF0 | ||
| 55 | tb = 0xCC | ||
| 56 | tc = 0xAA | ||
| 57 | inputs = { | ||
| 58 | ta : 'a', | ||
| 59 | tb : 'b', | ||
| 60 | tc : 'c', | ||
| 61 | } | ||
| 62 | for imm, instruction in inputs.items(): | ||
| 63 | register(imm, instruction, 0, 0) | ||
| 64 | register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs | ||
| 65 | |||
| 66 | # Try to combine two values from the db with an instruction. | ||
| 67 | # If it is better than the old method, update it. | ||
| 68 | while True: | ||
| 69 | registered = 0 | ||
| 70 | calls_copy = optimized_calls.copy() | ||
| 71 | for OP, (argc, cost, f) in OPS.items(): | ||
| 72 | for args in product(calls_copy.items(), repeat=argc): | ||
| 73 | # unpack(transponse) the arrays | ||
| 74 | imm = [arg[0] for arg in args] | ||
| 75 | value = [arg[1][0] for arg in args] | ||
| 76 | count = [arg[1][1] for arg in args] | ||
| 77 | latency = [arg[1][2] for arg in args] | ||
| 78 | |||
| 79 | registered += register( | ||
| 80 | f(*imm), | ||
| 81 | OP.format(*value), | ||
| 82 | sum(count) + cost, | ||
| 83 | max(latency) + cost) | ||
| 84 | if registered == 0: | ||
| 85 | # No update at all? So terminate | ||
| 86 | break | ||
| 87 | |||
| 88 | # Hacky output. Please improve me to output valid C++ instead. | ||
| 89 | s = """ case {imm}: | ||
| 90 | return {op};""" | ||
| 91 | for imm in range(256): | ||
| 92 | print(s.format(imm=imm, op=optimized_calls[imm][0])) | ||
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 67388d980..1fc1358bc 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -53,7 +53,6 @@ void MaxwellDMA::Launch() { | |||
| 53 | 53 | ||
| 54 | // TODO(Subv): Perform more research and implement all features of this engine. | 54 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 55 | const LaunchDMA& launch = regs.launch_dma; | 55 | const LaunchDMA& launch = regs.launch_dma; |
| 56 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); | ||
| 57 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | 56 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 58 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | 57 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| 59 | ASSERT(regs.dst_params.origin.x == 0); | 58 | ASSERT(regs.dst_params.origin.x == 0); |
| @@ -79,6 +78,7 @@ void MaxwellDMA::Launch() { | |||
| 79 | CopyPitchToBlockLinear(); | 78 | CopyPitchToBlockLinear(); |
| 80 | } | 79 | } |
| 81 | } | 80 | } |
| 81 | ReleaseSemaphore(); | ||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | void MaxwellDMA::CopyPitchToPitch() { | 84 | void MaxwellDMA::CopyPitchToPitch() { |
| @@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 244 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 244 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | void MaxwellDMA::ReleaseSemaphore() { | ||
| 248 | const auto type = regs.launch_dma.semaphore_type; | ||
| 249 | const GPUVAddr address = regs.semaphore.address; | ||
| 250 | switch (type) { | ||
| 251 | case LaunchDMA::SemaphoreType::NONE: | ||
| 252 | break; | ||
| 253 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: | ||
| 254 | memory_manager.Write<u32>(address, regs.semaphore.payload); | ||
| 255 | break; | ||
| 256 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: | ||
| 257 | memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload)); | ||
| 258 | memory_manager.Write<u64>(address + 8, system.GPU().GetTicks()); | ||
| 259 | break; | ||
| 260 | default: | ||
| 261 | UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value())); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 247 | } // namespace Tegra::Engines | 265 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index a04514425..2692cac8a 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -224,6 +224,8 @@ private: | |||
| 224 | 224 | ||
| 225 | void FastCopyBlockLinearToPitch(); | 225 | void FastCopyBlockLinearToPitch(); |
| 226 | 226 | ||
| 227 | void ReleaseSemaphore(); | ||
| 228 | |||
| 227 | Core::System& system; | 229 | Core::System& system; |
| 228 | 230 | ||
| 229 | MemoryManager& memory_manager; | 231 | MemoryManager& memory_manager; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index f8495896c..9e6732abd 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -243,10 +243,6 @@ GraphicsPipeline::GraphicsPipeline( | |||
| 243 | case Settings::ShaderBackend::GLASM: | 243 | case Settings::ShaderBackend::GLASM: |
| 244 | if (!sources[stage].empty()) { | 244 | if (!sources[stage].empty()) { |
| 245 | assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); | 245 | assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage)); |
| 246 | if (in_parallel) { | ||
| 247 | // Make sure program is built before continuing when building in parallel | ||
| 248 | glGetString(GL_PROGRAM_ERROR_STRING_NV); | ||
| 249 | } | ||
| 250 | } | 246 | } |
| 251 | break; | 247 | break; |
| 252 | case Settings::ShaderBackend::SPIRV: | 248 | case Settings::ShaderBackend::SPIRV: |
| @@ -256,20 +252,18 @@ GraphicsPipeline::GraphicsPipeline( | |||
| 256 | break; | 252 | break; |
| 257 | } | 253 | } |
| 258 | } | 254 | } |
| 259 | if (in_parallel && backend != Settings::ShaderBackend::GLASM) { | 255 | if (in_parallel) { |
| 260 | // Make sure programs have built if we are building shaders in parallel | 256 | std::lock_guard lock{built_mutex}; |
| 261 | for (OGLProgram& program : source_programs) { | 257 | built_fence.Create(); |
| 262 | if (program.handle != 0) { | 258 | // Flush this context to ensure compilation commands and fence are in the GPU pipe. |
| 263 | GLint status{}; | 259 | glFlush(); |
| 264 | glGetProgramiv(program.handle, GL_LINK_STATUS, &status); | 260 | built_condvar.notify_one(); |
| 265 | } | 261 | } else { |
| 266 | } | 262 | is_built = true; |
| 267 | } | 263 | } |
| 268 | if (shader_notify) { | 264 | if (shader_notify) { |
| 269 | shader_notify->MarkShaderComplete(); | 265 | shader_notify->MarkShaderComplete(); |
| 270 | } | 266 | } |
| 271 | is_built = true; | ||
| 272 | built_condvar.notify_one(); | ||
| 273 | }}; | 267 | }}; |
| 274 | if (thread_worker) { | 268 | if (thread_worker) { |
| 275 | thread_worker->QueueWork(std::move(func)); | 269 | thread_worker->QueueWork(std::move(func)); |
| @@ -440,7 +434,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 440 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | 434 | buffer_cache.UpdateGraphicsBuffers(is_indexed); |
| 441 | buffer_cache.BindHostGeometryBuffers(is_indexed); | 435 | buffer_cache.BindHostGeometryBuffers(is_indexed); |
| 442 | 436 | ||
| 443 | if (!is_built.load(std::memory_order::relaxed)) { | 437 | if (!IsBuilt()) { |
| 444 | WaitForBuild(); | 438 | WaitForBuild(); |
| 445 | } | 439 | } |
| 446 | const bool use_assembly{assembly_programs[0].handle != 0}; | 440 | const bool use_assembly{assembly_programs[0].handle != 0}; |
| @@ -585,8 +579,26 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { | |||
| 585 | } | 579 | } |
| 586 | 580 | ||
| 587 | void GraphicsPipeline::WaitForBuild() { | 581 | void GraphicsPipeline::WaitForBuild() { |
| 588 | std::unique_lock lock{built_mutex}; | 582 | if (built_fence.handle == 0) { |
| 589 | built_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); | 583 | std::unique_lock lock{built_mutex}; |
| 584 | built_condvar.wait(lock, [this] { return built_fence.handle != 0; }); | ||
| 585 | } | ||
| 586 | ASSERT(glClientWaitSync(built_fence.handle, 0, GL_TIMEOUT_IGNORED) != GL_WAIT_FAILED); | ||
| 587 | is_built = true; | ||
| 588 | } | ||
| 589 | |||
| 590 | bool GraphicsPipeline::IsBuilt() noexcept { | ||
| 591 | if (is_built) { | ||
| 592 | return true; | ||
| 593 | } | ||
| 594 | if (built_fence.handle == 0) { | ||
| 595 | return false; | ||
| 596 | } | ||
| 597 | // Timeout of zero means this is non-blocking | ||
| 598 | const auto sync_status = glClientWaitSync(built_fence.handle, 0, 0); | ||
| 599 | ASSERT(sync_status != GL_WAIT_FAILED); | ||
| 600 | is_built = sync_status != GL_TIMEOUT_EXPIRED; | ||
| 601 | return is_built; | ||
| 590 | } | 602 | } |
| 591 | 603 | ||
| 592 | } // namespace OpenGL | 604 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 4e28d9a42..311d49f3f 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -100,9 +100,7 @@ public: | |||
| 100 | return writes_global_memory; | 100 | return writes_global_memory; |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | [[nodiscard]] bool IsBuilt() const noexcept { | 103 | [[nodiscard]] bool IsBuilt() noexcept; |
| 104 | return is_built.load(std::memory_order::relaxed); | ||
| 105 | } | ||
| 106 | 104 | ||
| 107 | template <typename Spec> | 105 | template <typename Spec> |
| 108 | static auto MakeConfigureSpecFunc() { | 106 | static auto MakeConfigureSpecFunc() { |
| @@ -154,7 +152,8 @@ private: | |||
| 154 | 152 | ||
| 155 | std::mutex built_mutex; | 153 | std::mutex built_mutex; |
| 156 | std::condition_variable built_condvar; | 154 | std::condition_variable built_condvar; |
| 157 | std::atomic_bool is_built{false}; | 155 | OGLSync built_fence{}; |
| 156 | bool is_built{false}; | ||
| 158 | }; | 157 | }; |
| 159 | 158 | ||
| 160 | } // namespace OpenGL | 159 | } // namespace OpenGL |
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 329bf4def..2f2594585 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -50,6 +50,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Cor | |||
| 50 | gpu->BindRenderer(std::move(renderer)); | 50 | gpu->BindRenderer(std::move(renderer)); |
| 51 | return gpu; | 51 | return gpu; |
| 52 | } catch (const std::runtime_error& exception) { | 52 | } catch (const std::runtime_error& exception) { |
| 53 | scope.Cancel(); | ||
| 53 | LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); | 54 | LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); |
| 54 | return nullptr; | 55 | return nullptr; |
| 55 | } | 56 | } |