diff options
| author | 2022-09-19 12:17:51 -0700 | |
|---|---|---|
| committer | 2022-09-19 12:17:51 -0700 | |
| commit | 8d4458ef24e473e57b9931d7a9d1442b51fb0b1a (patch) | |
| tree | eacbdc37ea2879612238e15cf3c47c82f297d756 | |
| parent | Merge pull request #8915 from vonchenplus/opus_multi_stream (diff) | |
| parent | astc: Enable parallel CPU astc decoding (diff) | |
| download | yuzu-8d4458ef24e473e57b9931d7a9d1442b51fb0b1a.tar.gz yuzu-8d4458ef24e473e57b9931d7a9d1442b51fb0b1a.tar.xz yuzu-8d4458ef24e473e57b9931d7a9d1442b51fb0b1a.zip | |
Merge pull request #8849 from Morph1984/parallel-astc
astc: Enable parallel CPU astc decoding
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/textures/astc.cpp | 56 |
1 files changed, 35 insertions, 21 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index e3f3d3c5d..b159494c5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -13,7 +13,9 @@ | |||
| 13 | 13 | ||
| 14 | #include <boost/container/static_vector.hpp> | 14 | #include <boost/container/static_vector.hpp> |
| 15 | 15 | ||
| 16 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "common/thread_worker.h" | ||
| 17 | #include "video_core/textures/astc.h" | 19 | #include "video_core/textures/astc.h" |
| 18 | 20 | ||
| 19 | class InputBitStream { | 21 | class InputBitStream { |
| @@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, | |||
| 1650 | 1652 | ||
| 1651 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 1653 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 1652 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { | 1654 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { |
| 1653 | u32 block_index = 0; | 1655 | const u32 rows = Common::DivideUp(height, block_height); |
| 1654 | std::size_t depth_offset = 0; | 1656 | const u32 cols = Common::DivideUp(width, block_width); |
| 1655 | for (u32 z = 0; z < depth; z++) { | 1657 | |
| 1656 | for (u32 y = 0; y < height; y += block_height) { | 1658 | Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, |
| 1657 | for (u32 x = 0; x < width; x += block_width) { | 1659 | "yuzu:ASTCDecompress"}; |
| 1658 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; | 1660 | |
| 1659 | 1661 | for (u32 z = 0; z < depth; ++z) { | |
| 1660 | // Blocks can be at most 12x12 | 1662 | const u32 depth_offset = z * height * width * 4; |
| 1661 | std::array<u32, 12 * 12> uncompData; | 1663 | for (u32 y_index = 0; y_index < rows; ++y_index) { |
| 1662 | DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1664 | auto decompress_stride = [data, width, height, depth, block_width, block_height, output, |
| 1663 | 1665 | rows, cols, z, depth_offset, y_index] { | |
| 1664 | u32 decompWidth = std::min(block_width, width - x); | 1666 | const u32 y = y_index * block_height; |
| 1665 | u32 decompHeight = std::min(block_height, height - y); | 1667 | for (u32 x_index = 0; x_index < cols; ++x_index) { |
| 1666 | 1668 | const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; | |
| 1667 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); | 1669 | const u32 x = x_index * block_width; |
| 1668 | for (u32 jj = 0; jj < decompHeight; jj++) { | 1670 | |
| 1669 | std::memcpy(outRow.data() + jj * width * 4, | 1671 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; |
| 1670 | uncompData.data() + jj * block_width, decompWidth * 4); | 1672 | |
| 1673 | // Blocks can be at most 12x12 | ||
| 1674 | std::array<u32, 12 * 12> uncompData; | ||
| 1675 | DecompressBlock(blockPtr, block_width, block_height, uncompData); | ||
| 1676 | |||
| 1677 | u32 decompWidth = std::min(block_width, width - x); | ||
| 1678 | u32 decompHeight = std::min(block_height, height - y); | ||
| 1679 | |||
| 1680 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); | ||
| 1681 | for (u32 h = 0; h < decompHeight; ++h) { | ||
| 1682 | std::memcpy(outRow.data() + h * width * 4, | ||
| 1683 | uncompData.data() + h * block_width, decompWidth * 4); | ||
| 1684 | } | ||
| 1671 | } | 1685 | } |
| 1672 | ++block_index; | 1686 | }; |
| 1673 | } | 1687 | workers.QueueWork(std::move(decompress_stride)); |
| 1674 | } | 1688 | } |
| 1675 | depth_offset += height * width * 4; | 1689 | workers.WaitForRequests(); |
| 1676 | } | 1690 | } |
| 1677 | } | 1691 | } |
| 1678 | 1692 | ||