summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/textures/astc.cpp56
1 files changed, 35 insertions, 21 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index e3f3d3c5d..b159494c5 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -13,7 +13,9 @@
13 13
14#include <boost/container/static_vector.hpp> 14#include <boost/container/static_vector.hpp>
15 15
16#include "common/alignment.h"
16#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/thread_worker.h"
17#include "video_core/textures/astc.h" 19#include "video_core/textures/astc.h"
18 20
19class InputBitStream { 21class InputBitStream {
@@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1650 1652
1651void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 1653void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1652 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { 1654 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1653 u32 block_index = 0; 1655 const u32 rows = Common::DivideUp(height, block_height);
1654 std::size_t depth_offset = 0; 1656 const u32 cols = Common::DivideUp(width, block_width);
1655 for (u32 z = 0; z < depth; z++) { 1657
1656 for (u32 y = 0; y < height; y += block_height) { 1658 Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
1657 for (u32 x = 0; x < width; x += block_width) { 1659 "yuzu:ASTCDecompress"};
1658 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; 1660
1659 1661 for (u32 z = 0; z < depth; ++z) {
1660 // Blocks can be at most 12x12 1662 const u32 depth_offset = z * height * width * 4;
1661 std::array<u32, 12 * 12> uncompData; 1663 for (u32 y_index = 0; y_index < rows; ++y_index) {
1662 DecompressBlock(blockPtr, block_width, block_height, uncompData); 1664 auto decompress_stride = [data, width, height, depth, block_width, block_height, output,
1663 1665 rows, cols, z, depth_offset, y_index] {
1664 u32 decompWidth = std::min(block_width, width - x); 1666 const u32 y = y_index * block_height;
1665 u32 decompHeight = std::min(block_height, height - y); 1667 for (u32 x_index = 0; x_index < cols; ++x_index) {
1666 1668 const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index;
1667 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); 1669 const u32 x = x_index * block_width;
1668 for (u32 jj = 0; jj < decompHeight; jj++) { 1670
1669 std::memcpy(outRow.data() + jj * width * 4, 1671 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1670 uncompData.data() + jj * block_width, decompWidth * 4); 1672
1673 // Blocks can be at most 12x12
1674 std::array<u32, 12 * 12> uncompData;
1675 DecompressBlock(blockPtr, block_width, block_height, uncompData);
1676
1677 u32 decompWidth = std::min(block_width, width - x);
1678 u32 decompHeight = std::min(block_height, height - y);
1679
1680 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1681 for (u32 h = 0; h < decompHeight; ++h) {
1682 std::memcpy(outRow.data() + h * width * 4,
1683 uncompData.data() + h * block_width, decompWidth * 4);
1684 }
1671 } 1685 }
1672 ++block_index; 1686 };
1673 } 1687 workers.QueueWork(std::move(decompress_stride));
1674 } 1688 }
1675 depth_offset += height * width * 4; 1689 workers.WaitForRequests();
1676 } 1690 }
1677} 1691}
1678 1692