diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/alignment.h | 29 | ||||
| -rw-r--r-- | src/common/bit_util.h | 76 | ||||
| -rw-r--r-- | src/core/hle/kernel/k_priority_queue.h | 4 | ||||
| -rw-r--r-- | src/core/hle/kernel/k_scheduler.cpp | 8 | ||||
| -rw-r--r-- | src/core/hle/kernel/memory/page_heap.h | 4 | ||||
| -rw-r--r-- | src/core/hle/kernel/process_capability.cpp | 4 | ||||
| -rw-r--r-- | src/tests/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/tests/common/bit_utils.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/cdma_pusher.h | 2 | ||||
| -rw-r--r-- | src/video_core/command_classes/codecs/h264.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/accelerated_swizzle.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 8 |
14 files changed, 40 insertions, 141 deletions
diff --git a/src/common/alignment.h b/src/common/alignment.h index 5040043de..fb81f10d8 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -9,50 +9,45 @@ | |||
| 9 | namespace Common { | 9 | namespace Common { |
| 10 | 10 | ||
| 11 | template <typename T> | 11 | template <typename T> |
| 12 | [[nodiscard]] constexpr T AlignUp(T value, std::size_t size) { | 12 | requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignUp(T value, size_t size) { |
| 13 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | ||
| 14 | auto mod{static_cast<T>(value % size)}; | 13 | auto mod{static_cast<T>(value % size)}; |
| 15 | value -= mod; | 14 | value -= mod; |
| 16 | return static_cast<T>(mod == T{0} ? value : value + size); | 15 | return static_cast<T>(mod == T{0} ? value : value + size); |
| 17 | } | 16 | } |
| 18 | 17 | ||
| 19 | template <typename T> | 18 | template <typename T> |
| 20 | [[nodiscard]] constexpr T AlignDown(T value, std::size_t size) { | 19 | requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignUpLog2(T value, size_t align_log2) { |
| 21 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | 20 | return static_cast<T>((value + ((1ULL << align_log2) - 1)) >> align_log2 << align_log2); |
| 22 | return static_cast<T>(value - value % size); | ||
| 23 | } | 21 | } |
| 24 | 22 | ||
| 25 | template <typename T> | 23 | template <typename T> |
| 26 | [[nodiscard]] constexpr T AlignBits(T value, std::size_t align) { | 24 | requires std::is_unsigned_v<T>[[nodiscard]] constexpr T AlignDown(T value, size_t size) { |
| 27 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | 25 | return static_cast<T>(value - value % size); |
| 28 | return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align); | ||
| 29 | } | 26 | } |
| 30 | 27 | ||
| 31 | template <typename T> | 28 | template <typename T> |
| 32 | [[nodiscard]] constexpr bool Is4KBAligned(T value) { | 29 | requires std::is_unsigned_v<T>[[nodiscard]] constexpr bool Is4KBAligned(T value) { |
| 33 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | ||
| 34 | return (value & 0xFFF) == 0; | 30 | return (value & 0xFFF) == 0; |
| 35 | } | 31 | } |
| 36 | 32 | ||
| 37 | template <typename T> | 33 | template <typename T> |
| 38 | [[nodiscard]] constexpr bool IsWordAligned(T value) { | 34 | requires std::is_unsigned_v<T>[[nodiscard]] constexpr bool IsWordAligned(T value) { |
| 39 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | ||
| 40 | return (value & 0b11) == 0; | 35 | return (value & 0b11) == 0; |
| 41 | } | 36 | } |
| 42 | 37 | ||
| 43 | template <typename T> | 38 | template <typename T> |
| 44 | [[nodiscard]] constexpr bool IsAligned(T value, std::size_t alignment) { | 39 | requires std::is_integral_v<T>[[nodiscard]] constexpr bool IsAligned(T value, size_t alignment) { |
| 45 | using U = typename std::make_unsigned<T>::type; | 40 | using U = typename std::make_unsigned_t<T>; |
| 46 | const U mask = static_cast<U>(alignment - 1); | 41 | const U mask = static_cast<U>(alignment - 1); |
| 47 | return (value & mask) == 0; | 42 | return (value & mask) == 0; |
| 48 | } | 43 | } |
| 49 | 44 | ||
| 50 | template <typename T, std::size_t Align = 16> | 45 | template <typename T, size_t Align = 16> |
| 51 | class AlignmentAllocator { | 46 | class AlignmentAllocator { |
| 52 | public: | 47 | public: |
| 53 | using value_type = T; | 48 | using value_type = T; |
| 54 | using size_type = std::size_t; | 49 | using size_type = size_t; |
| 55 | using difference_type = std::ptrdiff_t; | 50 | using difference_type = ptrdiff_t; |
| 56 | 51 | ||
| 57 | using propagate_on_container_copy_assignment = std::true_type; | 52 | using propagate_on_container_copy_assignment = std::true_type; |
| 58 | using propagate_on_container_move_assignment = std::true_type; | 53 | using propagate_on_container_move_assignment = std::true_type; |
diff --git a/src/common/bit_util.h b/src/common/bit_util.h index 29f59a9a3..685e7fc9b 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h | |||
| @@ -22,82 +22,6 @@ template <typename T> | |||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | #ifdef _MSC_VER | 24 | #ifdef _MSC_VER |
| 25 | [[nodiscard]] inline u32 CountLeadingZeroes32(u32 value) { | ||
| 26 | unsigned long leading_zero = 0; | ||
| 27 | |||
| 28 | if (_BitScanReverse(&leading_zero, value) != 0) { | ||
| 29 | return 31 - leading_zero; | ||
| 30 | } | ||
| 31 | |||
| 32 | return 32; | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] inline u32 CountLeadingZeroes64(u64 value) { | ||
| 36 | unsigned long leading_zero = 0; | ||
| 37 | |||
| 38 | if (_BitScanReverse64(&leading_zero, value) != 0) { | ||
| 39 | return 63 - leading_zero; | ||
| 40 | } | ||
| 41 | |||
| 42 | return 64; | ||
| 43 | } | ||
| 44 | #else | ||
| 45 | [[nodiscard]] inline u32 CountLeadingZeroes32(u32 value) { | ||
| 46 | if (value == 0) { | ||
| 47 | return 32; | ||
| 48 | } | ||
| 49 | |||
| 50 | return static_cast<u32>(__builtin_clz(value)); | ||
| 51 | } | ||
| 52 | |||
| 53 | [[nodiscard]] inline u32 CountLeadingZeroes64(u64 value) { | ||
| 54 | if (value == 0) { | ||
| 55 | return 64; | ||
| 56 | } | ||
| 57 | |||
| 58 | return static_cast<u32>(__builtin_clzll(value)); | ||
| 59 | } | ||
| 60 | #endif | ||
| 61 | |||
| 62 | #ifdef _MSC_VER | ||
| 63 | [[nodiscard]] inline u32 CountTrailingZeroes32(u32 value) { | ||
| 64 | unsigned long trailing_zero = 0; | ||
| 65 | |||
| 66 | if (_BitScanForward(&trailing_zero, value) != 0) { | ||
| 67 | return trailing_zero; | ||
| 68 | } | ||
| 69 | |||
| 70 | return 32; | ||
| 71 | } | ||
| 72 | |||
| 73 | [[nodiscard]] inline u32 CountTrailingZeroes64(u64 value) { | ||
| 74 | unsigned long trailing_zero = 0; | ||
| 75 | |||
| 76 | if (_BitScanForward64(&trailing_zero, value) != 0) { | ||
| 77 | return trailing_zero; | ||
| 78 | } | ||
| 79 | |||
| 80 | return 64; | ||
| 81 | } | ||
| 82 | #else | ||
| 83 | [[nodiscard]] inline u32 CountTrailingZeroes32(u32 value) { | ||
| 84 | if (value == 0) { | ||
| 85 | return 32; | ||
| 86 | } | ||
| 87 | |||
| 88 | return static_cast<u32>(__builtin_ctz(value)); | ||
| 89 | } | ||
| 90 | |||
| 91 | [[nodiscard]] inline u32 CountTrailingZeroes64(u64 value) { | ||
| 92 | if (value == 0) { | ||
| 93 | return 64; | ||
| 94 | } | ||
| 95 | |||
| 96 | return static_cast<u32>(__builtin_ctzll(value)); | ||
| 97 | } | ||
| 98 | #endif | ||
| 99 | |||
| 100 | #ifdef _MSC_VER | ||
| 101 | 25 | ||
| 102 | [[nodiscard]] inline u32 MostSignificantBit32(const u32 value) { | 26 | [[nodiscard]] inline u32 MostSignificantBit32(const u32 value) { |
| 103 | unsigned long result; | 27 | unsigned long result; |
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h index 99fb8fe93..0dc929040 100644 --- a/src/core/hle/kernel/k_priority_queue.h +++ b/src/core/hle/kernel/k_priority_queue.h | |||
| @@ -8,11 +8,11 @@ | |||
| 8 | #pragma once | 8 | #pragma once |
| 9 | 9 | ||
| 10 | #include <array> | 10 | #include <array> |
| 11 | #include <bit> | ||
| 11 | #include <concepts> | 12 | #include <concepts> |
| 12 | 13 | ||
| 13 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 14 | #include "common/bit_set.h" | 15 | #include "common/bit_set.h" |
| 15 | #include "common/bit_util.h" | ||
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/concepts.h" | 17 | #include "common/concepts.h" |
| 18 | 18 | ||
| @@ -268,7 +268,7 @@ private: | |||
| 268 | } | 268 | } |
| 269 | 269 | ||
| 270 | constexpr s32 GetNextCore(u64& affinity) { | 270 | constexpr s32 GetNextCore(u64& affinity) { |
| 271 | const s32 core = Common::CountTrailingZeroes64(affinity); | 271 | const s32 core = std::countr_zero(affinity); |
| 272 | ClearAffinityBit(affinity, core); | 272 | ClearAffinityBit(affinity, core); |
| 273 | return core; | 273 | return core; |
| 274 | } | 274 | } |
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index 42f0ea483..12b5619fb 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | // This file references various implementation details from Atmosphere, an open-source firmware for | 5 | // This file references various implementation details from Atmosphere, an open-source firmware for |
| 6 | // the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. | 6 | // the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. |
| 7 | 7 | ||
| 8 | #include <bit> | ||
| 9 | |||
| 8 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | 11 | #include "common/bit_util.h" |
| 10 | #include "common/fiber.h" | 12 | #include "common/fiber.h" |
| @@ -31,12 +33,12 @@ static void IncrementScheduledCount(Kernel::Thread* thread) { | |||
| 31 | 33 | ||
| 32 | void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule, | 34 | void KScheduler::RescheduleCores(KernelCore& kernel, u64 cores_pending_reschedule, |
| 33 | Core::EmuThreadHandle global_thread) { | 35 | Core::EmuThreadHandle global_thread) { |
| 34 | u32 current_core = global_thread.host_handle; | 36 | const u32 current_core = global_thread.host_handle; |
| 35 | bool must_context_switch = global_thread.guest_handle != InvalidHandle && | 37 | bool must_context_switch = global_thread.guest_handle != InvalidHandle && |
| 36 | (current_core < Core::Hardware::NUM_CPU_CORES); | 38 | (current_core < Core::Hardware::NUM_CPU_CORES); |
| 37 | 39 | ||
| 38 | while (cores_pending_reschedule != 0) { | 40 | while (cores_pending_reschedule != 0) { |
| 39 | u32 core = Common::CountTrailingZeroes64(cores_pending_reschedule); | 41 | const auto core = static_cast<u32>(std::countr_zero(cores_pending_reschedule)); |
| 40 | ASSERT(core < Core::Hardware::NUM_CPU_CORES); | 42 | ASSERT(core < Core::Hardware::NUM_CPU_CORES); |
| 41 | if (!must_context_switch || core != current_core) { | 43 | if (!must_context_switch || core != current_core) { |
| 42 | auto& phys_core = kernel.PhysicalCore(core); | 44 | auto& phys_core = kernel.PhysicalCore(core); |
| @@ -109,7 +111,7 @@ u64 KScheduler::UpdateHighestPriorityThreadsImpl(KernelCore& kernel) { | |||
| 109 | 111 | ||
| 110 | // Idle cores are bad. We're going to try to migrate threads to each idle core in turn. | 112 | // Idle cores are bad. We're going to try to migrate threads to each idle core in turn. |
| 111 | while (idle_cores != 0) { | 113 | while (idle_cores != 0) { |
| 112 | u32 core_id = Common::CountTrailingZeroes64(idle_cores); | 114 | const auto core_id = static_cast<u32>(std::countr_zero(idle_cores)); |
| 113 | if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) { | 115 | if (Thread* suggested = priority_queue.GetSuggestedFront(core_id); suggested != nullptr) { |
| 114 | s32 migration_candidates[Core::Hardware::NUM_CPU_CORES]; | 116 | s32 migration_candidates[Core::Hardware::NUM_CPU_CORES]; |
| 115 | size_t num_candidates = 0; | 117 | size_t num_candidates = 0; |
diff --git a/src/core/hle/kernel/memory/page_heap.h b/src/core/hle/kernel/memory/page_heap.h index 22b0de860..131093284 100644 --- a/src/core/hle/kernel/memory/page_heap.h +++ b/src/core/hle/kernel/memory/page_heap.h | |||
| @@ -8,11 +8,11 @@ | |||
| 8 | #pragma once | 8 | #pragma once |
| 9 | 9 | ||
| 10 | #include <array> | 10 | #include <array> |
| 11 | #include <bit> | ||
| 11 | #include <vector> | 12 | #include <vector> |
| 12 | 13 | ||
| 13 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 14 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 15 | #include "common/bit_util.h" | ||
| 16 | #include "common/common_funcs.h" | 16 | #include "common/common_funcs.h" |
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "core/hle/kernel/memory/memory_types.h" | 18 | #include "core/hle/kernel/memory/memory_types.h" |
| @@ -105,7 +105,7 @@ private: | |||
| 105 | ASSERT(depth == 0); | 105 | ASSERT(depth == 0); |
| 106 | return -1; | 106 | return -1; |
| 107 | } | 107 | } |
| 108 | offset = offset * 64 + Common::CountTrailingZeroes64(v); | 108 | offset = offset * 64 + static_cast<u32>(std::countr_zero(v)); |
| 109 | ++depth; | 109 | ++depth; |
| 110 | } while (depth < static_cast<s32>(used_depths)); | 110 | } while (depth < static_cast<s32>(used_depths)); |
| 111 | 111 | ||
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 0f128c586..0566311b6 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <bit> | ||
| 6 | |||
| 5 | #include "common/bit_util.h" | 7 | #include "common/bit_util.h" |
| 6 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 7 | #include "core/hle/kernel/errors.h" | 9 | #include "core/hle/kernel/errors.h" |
| @@ -60,7 +62,7 @@ constexpr CapabilityType GetCapabilityType(u32 value) { | |||
| 60 | 62 | ||
| 61 | u32 GetFlagBitOffset(CapabilityType type) { | 63 | u32 GetFlagBitOffset(CapabilityType type) { |
| 62 | const auto value = static_cast<u32>(type); | 64 | const auto value = static_cast<u32>(type); |
| 63 | return static_cast<u32>(Common::BitSize<u32>() - Common::CountLeadingZeroes32(value)); | 65 | return static_cast<u32>(Common::BitSize<u32>() - static_cast<u32>(std::countl_zero(value))); |
| 64 | } | 66 | } |
| 65 | 67 | ||
| 66 | } // Anonymous namespace | 68 | } // Anonymous namespace |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 8a606b448..33fa89583 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -1,6 +1,5 @@ | |||
| 1 | add_executable(tests | 1 | add_executable(tests |
| 2 | common/bit_field.cpp | 2 | common/bit_field.cpp |
| 3 | common/bit_utils.cpp | ||
| 4 | common/fibers.cpp | 3 | common/fibers.cpp |
| 5 | common/param_package.cpp | 4 | common/param_package.cpp |
| 6 | common/ring_buffer.cpp | 5 | common/ring_buffer.cpp |
diff --git a/src/tests/common/bit_utils.cpp b/src/tests/common/bit_utils.cpp deleted file mode 100644 index 479b5995a..000000000 --- a/src/tests/common/bit_utils.cpp +++ /dev/null | |||
| @@ -1,23 +0,0 @@ | |||
| 1 | // Copyright 2017 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <catch2/catch.hpp> | ||
| 6 | #include <math.h> | ||
| 7 | #include "common/bit_util.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | TEST_CASE("BitUtils::CountTrailingZeroes", "[common]") { | ||
| 12 | REQUIRE(Common::CountTrailingZeroes32(0) == 32); | ||
| 13 | REQUIRE(Common::CountTrailingZeroes64(0) == 64); | ||
| 14 | REQUIRE(Common::CountTrailingZeroes32(9) == 0); | ||
| 15 | REQUIRE(Common::CountTrailingZeroes32(8) == 3); | ||
| 16 | REQUIRE(Common::CountTrailingZeroes32(0x801000) == 12); | ||
| 17 | REQUIRE(Common::CountTrailingZeroes64(9) == 0); | ||
| 18 | REQUIRE(Common::CountTrailingZeroes64(8) == 3); | ||
| 19 | REQUIRE(Common::CountTrailingZeroes64(0x801000) == 12); | ||
| 20 | REQUIRE(Common::CountTrailingZeroes64(0x801000000000UL) == 36); | ||
| 21 | } | ||
| 22 | |||
| 23 | } // namespace Common | ||
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 94679d5d1..33b3c060b 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp | |||
| @@ -18,10 +18,10 @@ | |||
| 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 19 | // | 19 | // |
| 20 | 20 | ||
| 21 | #include <bit> | ||
| 21 | #include "command_classes/host1x.h" | 22 | #include "command_classes/host1x.h" |
| 22 | #include "command_classes/nvdec.h" | 23 | #include "command_classes/nvdec.h" |
| 23 | #include "command_classes/vic.h" | 24 | #include "command_classes/vic.h" |
| 24 | #include "common/bit_util.h" | ||
| 25 | #include "video_core/cdma_pusher.h" | 25 | #include "video_core/cdma_pusher.h" |
| 26 | #include "video_core/command_classes/nvdec_common.h" | 26 | #include "video_core/command_classes/nvdec_common.h" |
| 27 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| @@ -56,7 +56,7 @@ void CDmaPusher::Step() { | |||
| 56 | 56 | ||
| 57 | for (const u32 value : values) { | 57 | for (const u32 value : values) { |
| 58 | if (mask != 0) { | 58 | if (mask != 0) { |
| 59 | const u32 lbs = Common::CountTrailingZeroes32(mask); | 59 | const auto lbs = static_cast<u32>(std::countr_zero(mask)); |
| 60 | mask &= ~(1U << lbs); | 60 | mask &= ~(1U << lbs); |
| 61 | ExecuteCommand(static_cast<u32>(offset + lbs), value); | 61 | ExecuteCommand(static_cast<u32>(offset + lbs), value); |
| 62 | continue; | 62 | continue; |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 8ca70b6dd..e5f212c1a 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -126,7 +126,7 @@ private: | |||
| 126 | 126 | ||
| 127 | s32 count{}; | 127 | s32 count{}; |
| 128 | s32 offset{}; | 128 | s32 offset{}; |
| 129 | s32 mask{}; | 129 | u32 mask{}; |
| 130 | bool incrementing{}; | 130 | bool incrementing{}; |
| 131 | 131 | ||
| 132 | // Queue of command lists to be processed | 132 | // Queue of command lists to be processed |
diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index 65bbeac78..fea6aed98 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | // | 19 | // |
| 20 | 20 | ||
| 21 | #include <array> | 21 | #include <array> |
| 22 | #include "common/bit_util.h" | 22 | #include <bit> |
| 23 | #include "video_core/command_classes/codecs/h264.h" | 23 | #include "video_core/command_classes/codecs/h264.h" |
| 24 | #include "video_core/gpu.h" | 24 | #include "video_core/gpu.h" |
| 25 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| @@ -266,7 +266,7 @@ void H264BitWriter::WriteExpGolombCodedInt(s32 value) { | |||
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { | 268 | void H264BitWriter::WriteExpGolombCodedUInt(u32 value) { |
| 269 | const s32 size = 32 - Common::CountLeadingZeroes32(static_cast<s32>(value + 1)); | 269 | const s32 size = 32 - std::countl_zero(value + 1); |
| 270 | WriteBits(1, size); | 270 | WriteBits(1, size); |
| 271 | 271 | ||
| 272 | value -= (1U << (size - 1)) - 1; | 272 | value -= (1U << (size - 1)) - 1; |
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp index a4fc1184b..15585caeb 100644 --- a/src/video_core/texture_cache/accelerated_swizzle.cpp +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp | |||
| @@ -27,7 +27,7 @@ BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameter | |||
| 27 | const Extent3D num_tiles = swizzle.num_tiles; | 27 | const Extent3D num_tiles = swizzle.num_tiles; |
| 28 | const u32 bytes_per_block = BytesPerBlock(info.format); | 28 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 29 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | 29 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); |
| 30 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | 30 | const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block; |
| 31 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | 31 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); |
| 32 | return BlockLinearSwizzle2DParams{ | 32 | return BlockLinearSwizzle2DParams{ |
| 33 | .origin{0, 0, 0}, | 33 | .origin{0, 0, 0}, |
| @@ -47,7 +47,7 @@ BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameter | |||
| 47 | const Extent3D num_tiles = swizzle.num_tiles; | 47 | const Extent3D num_tiles = swizzle.num_tiles; |
| 48 | const u32 bytes_per_block = BytesPerBlock(info.format); | 48 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 49 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | 49 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); |
| 50 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | 50 | const u32 stride = Common::AlignUpLog2(num_tiles.width, stride_alignment) * bytes_per_block; |
| 51 | 51 | ||
| 52 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | 52 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; |
| 53 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | 53 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 279932778..ce8fcfe0a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -279,7 +279,7 @@ template <u32 GOB_EXTENT> | |||
| 279 | const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); | 279 | const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); |
| 280 | const u32 alignment = is_small ? 0 : info.tile_width_spacing; | 280 | const u32 alignment = is_small ? 0 : info.tile_width_spacing; |
| 281 | return Extent2D{ | 281 | return Extent2D{ |
| 282 | .width = Common::AlignBits(gobs.width, alignment), | 282 | .width = Common::AlignUpLog2(gobs.width, alignment), |
| 283 | .height = gobs.height, | 283 | .height = gobs.height, |
| 284 | }; | 284 | }; |
| 285 | } | 285 | } |
| @@ -352,7 +352,7 @@ template <u32 GOB_EXTENT> | |||
| 352 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 | 352 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 |
| 353 | if (tile_width_spacing > 0) { | 353 | if (tile_width_spacing > 0) { |
| 354 | const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; | 354 | const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; |
| 355 | return Common::AlignBits(size_bytes, alignment_log2); | 355 | return Common::AlignUpLog2(size_bytes, alignment_log2); |
| 356 | } | 356 | } |
| 357 | const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); | 357 | const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); |
| 358 | while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { | 358 | while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { |
| @@ -528,9 +528,9 @@ template <u32 GOB_EXTENT> | |||
| 528 | const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); | 528 | const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); |
| 529 | const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); | 529 | const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); |
| 530 | return Extent3D{ | 530 | return Extent3D{ |
| 531 | .width = Common::AlignBits(num_tiles.width, alignment), | 531 | .width = Common::AlignUpLog2(num_tiles.width, alignment), |
| 532 | .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), | 532 | .height = Common::AlignUpLog2(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), |
| 533 | .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), | 533 | .depth = Common::AlignUpLog2(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), |
| 534 | }; | 534 | }; |
| 535 | } | 535 | } |
| 536 | 536 | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 9f5181318..62685a183 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -49,7 +49,7 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | |||
| 49 | // We can configure here a custom pitch | 49 | // We can configure here a custom pitch |
| 50 | // As it's not exposed 'width * bpp' will be the expected pitch. | 50 | // As it's not exposed 'width * bpp' will be the expected pitch. |
| 51 | const u32 pitch = width * bytes_per_pixel; | 51 | const u32 pitch = width * bytes_per_pixel; |
| 52 | const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; | 52 | const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel; |
| 53 | 53 | ||
| 54 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | 54 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); |
| 55 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); | 55 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); |
| @@ -217,9 +217,9 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 | |||
| 217 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 217 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 218 | u32 block_height, u32 block_depth) { | 218 | u32 block_height, u32 block_depth) { |
| 219 | if (tiled) { | 219 | if (tiled) { |
| 220 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); | 220 | const u32 aligned_width = Common::AlignUpLog2(width * bytes_per_pixel, GOB_SIZE_X_SHIFT); |
| 221 | const u32 aligned_height = Common::AlignBits(height, GOB_SIZE_Y_SHIFT + block_height); | 221 | const u32 aligned_height = Common::AlignUpLog2(height, GOB_SIZE_Y_SHIFT + block_height); |
| 222 | const u32 aligned_depth = Common::AlignBits(depth, GOB_SIZE_Z_SHIFT + block_depth); | 222 | const u32 aligned_depth = Common::AlignUpLog2(depth, GOB_SIZE_Z_SHIFT + block_depth); |
| 223 | return aligned_width * aligned_height * aligned_depth; | 223 | return aligned_width * aligned_height * aligned_depth; |
| 224 | } else { | 224 | } else { |
| 225 | return width * height * depth * bytes_per_pixel; | 225 | return width * height * depth * bytes_per_pixel; |