diff options
| author | 2021-06-16 11:29:10 -0700 | |
|---|---|---|
| committer | 2021-06-16 11:29:10 -0700 | |
| commit | 973bf306edda84e730d56dd73a04c7bbd20d9397 (patch) | |
| tree | 4d64ac6f6de8e7a2eeeeeba9bd850b7924e46a6b | |
| parent | Merge pull request #6460 from Morph1984/fs-access-log-fix (diff) | |
| parent | astc_decoder: Fix LDR CEM1 endpoint calculation (diff) | |
| download | yuzu-973bf306edda84e730d56dd73a04c7bbd20d9397.tar.gz yuzu-973bf306edda84e730d56dd73a04c7bbd20d9397.tar.xz yuzu-973bf306edda84e730d56dd73a04c7bbd20d9397.zip | |
Merge pull request #6464 from ameerj/disable-astc
textures: Add a toggle for GPU Accelerated ASTC decoder
Diffstat (limited to '')
| -rw-r--r-- | src/common/settings.cpp | 2 | ||||
| -rw-r--r-- | src/common/settings.h | 1 | ||||
| -rw-r--r-- | src/core/telemetry_session.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 1577 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 3 | ||||
| -rw-r--r-- | src/yuzu/configuration/config.cpp | 2 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics.cpp | 7 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics.h | 1 | ||||
| -rw-r--r-- | src/yuzu/configuration/configure_graphics.ui | 7 | ||||
| -rw-r--r-- | src/yuzu_cmd/config.cpp | 6 | ||||
| -rw-r--r-- | src/yuzu_cmd/default_ini.h | 8 |
16 files changed, 1637 insertions, 7 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 360e878d6..9ec71eced 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -55,6 +55,7 @@ void LogSettings() { | |||
| 55 | log_setting("Renderer_UseAsynchronousGpuEmulation", | 55 | log_setting("Renderer_UseAsynchronousGpuEmulation", |
| 56 | values.use_asynchronous_gpu_emulation.GetValue()); | 56 | values.use_asynchronous_gpu_emulation.GetValue()); |
| 57 | log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); | 57 | log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); |
| 58 | log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); | ||
| 58 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); | 59 | log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); |
| 59 | log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); | 60 | log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); |
| 60 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); | 61 | log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); |
| @@ -135,6 +136,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 135 | values.gpu_accuracy.SetGlobal(true); | 136 | values.gpu_accuracy.SetGlobal(true); |
| 136 | values.use_asynchronous_gpu_emulation.SetGlobal(true); | 137 | values.use_asynchronous_gpu_emulation.SetGlobal(true); |
| 137 | values.use_nvdec_emulation.SetGlobal(true); | 138 | values.use_nvdec_emulation.SetGlobal(true); |
| 139 | values.accelerate_astc.SetGlobal(true); | ||
| 138 | values.use_vsync.SetGlobal(true); | 140 | values.use_vsync.SetGlobal(true); |
| 139 | values.use_assembly_shaders.SetGlobal(true); | 141 | values.use_assembly_shaders.SetGlobal(true); |
| 140 | values.use_asynchronous_shaders.SetGlobal(true); | 142 | values.use_asynchronous_shaders.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index bf34f2b5b..6198f2d9f 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -147,6 +147,7 @@ struct Values { | |||
| 147 | Setting<GPUAccuracy> gpu_accuracy; | 147 | Setting<GPUAccuracy> gpu_accuracy; |
| 148 | Setting<bool> use_asynchronous_gpu_emulation; | 148 | Setting<bool> use_asynchronous_gpu_emulation; |
| 149 | Setting<bool> use_nvdec_emulation; | 149 | Setting<bool> use_nvdec_emulation; |
| 150 | Setting<bool> accelerate_astc; | ||
| 150 | Setting<bool> use_vsync; | 151 | Setting<bool> use_vsync; |
| 151 | Setting<bool> use_assembly_shaders; | 152 | Setting<bool> use_assembly_shaders; |
| 152 | Setting<bool> use_asynchronous_shaders; | 153 | Setting<bool> use_asynchronous_shaders; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index ad1a9ffb4..d4c23ced2 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -230,6 +230,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, | |||
| 230 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); | 230 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| 231 | AddField(field_type, "Renderer_UseNvdecEmulation", | 231 | AddField(field_type, "Renderer_UseNvdecEmulation", |
| 232 | Settings::values.use_nvdec_emulation.GetValue()); | 232 | Settings::values.use_nvdec_emulation.GetValue()); |
| 233 | AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); | ||
| 233 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); | 234 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); |
| 234 | AddField(field_type, "Renderer_UseAssemblyShaders", | 235 | AddField(field_type, "Renderer_UseAssemblyShaders", |
| 235 | Settings::values.use_assembly_shaders.GetValue()); | 236 | Settings::values.use_assembly_shaders.GetValue()); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 47190c464..f9454bbaa 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -237,6 +237,7 @@ add_library(video_core STATIC | |||
| 237 | texture_cache/util.cpp | 237 | texture_cache/util.cpp |
| 238 | texture_cache/util.h | 238 | texture_cache/util.h |
| 239 | textures/astc.h | 239 | textures/astc.h |
| 240 | textures/astc.cpp | ||
| 240 | textures/decoders.cpp | 241 | textures/decoders.cpp |
| 241 | textures/decoders.h | 242 | textures/decoders.h |
| 242 | textures/texture.cpp | 243 | textures/texture.cpp |
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 703e34587..eaba1b103 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -763,7 +763,7 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode) { | |||
| 763 | case 1: { | 763 | case 1: { |
| 764 | READ_UINT_VALUES(2) | 764 | READ_UINT_VALUES(2) |
| 765 | uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | 765 | uint L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 766 | uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); | 766 | uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); |
| 767 | ep1 = uvec4(0xFF, L0, L0, L0); | 767 | ep1 = uvec4(0xFF, L0, L0, L0); |
| 768 | ep2 = uvec4(0xFF, L1, L1, L1); | 768 | ep2 = uvec4(0xFF, L1, L1, L1); |
| 769 | break; | 769 | break; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index ffe9edc1b..9b4038615 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "common/settings.h" | ||
| 13 | |||
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 14 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -307,7 +309,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 307 | 309 | ||
| 308 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | 310 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, |
| 309 | const VideoCommon::ImageInfo& info) { | 311 | const VideoCommon::ImageInfo& info) { |
| 310 | return !runtime.HasNativeASTC() && IsPixelFormatASTC(info.format); | 312 | if (IsPixelFormatASTC(info.format)) { |
| 313 | return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue(); | ||
| 314 | } | ||
| 311 | // Disable other accelerated uploads for now as they don't implement swizzled uploads | 315 | // Disable other accelerated uploads for now as they don't implement swizzled uploads |
| 312 | return false; | 316 | return false; |
| 313 | switch (info.type) { | 317 | switch (info.type) { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index bdd0ce8bc..52860b4cf 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 11 | #include "common/settings.h" | ||
| 11 | 12 | ||
| 12 | #include "video_core/engines/fermi_2d.h" | 13 | #include "video_core/engines/fermi_2d.h" |
| 13 | #include "video_core/renderer_vulkan/blit_image.h" | 14 | #include "video_core/renderer_vulkan/blit_image.h" |
| @@ -828,7 +829,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 828 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 829 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); |
| 829 | } | 830 | } |
| 830 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 831 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 831 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | 832 | if (Settings::values.accelerate_astc.GetValue()) { |
| 833 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | ||
| 834 | } else { | ||
| 835 | flags |= VideoCommon::ImageFlagBits::Converted; | ||
| 836 | } | ||
| 832 | } | 837 | } |
| 833 | if (runtime.device.HasDebuggingToolAttached()) { | 838 | if (runtime.device.HasDebuggingToolAttached()) { |
| 834 | if (image) { | 839 | if (image) { |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 906604a39..0d3e0804f 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include "video_core/texture_cache/formatter.h" | 47 | #include "video_core/texture_cache/formatter.h" |
| 48 | #include "video_core/texture_cache/samples_helper.h" | 48 | #include "video_core/texture_cache/samples_helper.h" |
| 49 | #include "video_core/texture_cache/util.h" | 49 | #include "video_core/texture_cache/util.h" |
| 50 | #include "video_core/textures/astc.h" | ||
| 50 | #include "video_core/textures/decoders.h" | 51 | #include "video_core/textures/decoders.h" |
| 51 | 52 | ||
| 52 | namespace VideoCommon { | 53 | namespace VideoCommon { |
| @@ -884,8 +885,16 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 884 | ASSERT(copy.image_extent == mip_size); | 885 | ASSERT(copy.image_extent == mip_size); |
| 885 | ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); | 886 | ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); |
| 886 | ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); | 887 | ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); |
| 887 | DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | 888 | if (IsPixelFormatASTC(info.format)) { |
| 888 | output.subspan(output_offset)); | 889 | ASSERT(copy.image_extent.depth == 1); |
| 890 | Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), | ||
| 891 | copy.image_extent.width, copy.image_extent.height, | ||
| 892 | copy.image_subresource.num_layers, tile_size.width, | ||
| 893 | tile_size.height, output.subspan(output_offset)); | ||
| 894 | } else { | ||
| 895 | DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | ||
| 896 | output.subspan(output_offset)); | ||
| 897 | } | ||
| 889 | copy.buffer_offset = output_offset; | 898 | copy.buffer_offset = output_offset; |
| 890 | copy.buffer_row_length = mip_size.width; | 899 | copy.buffer_row_length = mip_size.width; |
| 891 | copy.buffer_image_height = mip_size.height; | 900 | copy.buffer_image_height = mip_size.height; |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp new file mode 100644 index 000000000..9b2177ebd --- /dev/null +++ b/src/video_core/textures/astc.cpp | |||
| @@ -0,0 +1,1577 @@ | |||
| 1 | // Copyright 2016 The University of North Carolina at Chapel Hill | ||
| 2 | // | ||
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 4 | // you may not use this file except in compliance with the License. | ||
| 5 | // You may obtain a copy of the License at | ||
| 6 | // | ||
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
| 8 | // | ||
| 9 | // Unless required by applicable law or agreed to in writing, software | ||
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 12 | // See the License for the specific language governing permissions and | ||
| 13 | // limitations under the License. | ||
| 14 | // | ||
| 15 | // Please send all BUG REPORTS to <pavel@cs.unc.edu>. | ||
| 16 | // <http://gamma.cs.unc.edu/FasTC/> | ||
| 17 | |||
| 18 | #include <algorithm> | ||
| 19 | #include <cassert> | ||
| 20 | #include <cstring> | ||
| 21 | #include <span> | ||
| 22 | #include <vector> | ||
| 23 | |||
| 24 | #include <boost/container/static_vector.hpp> | ||
| 25 | |||
| 26 | #include "common/common_types.h" | ||
| 27 | #include "video_core/textures/astc.h" | ||
| 28 | |||
| 29 | class InputBitStream { | ||
| 30 | public: | ||
| 31 | constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0) | ||
| 32 | : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {} | ||
| 33 | |||
| 34 | constexpr size_t GetBitsRead() const { | ||
| 35 | return bits_read; | ||
| 36 | } | ||
| 37 | |||
| 38 | constexpr bool ReadBit() { | ||
| 39 | if (bits_read >= total_bits * 8) { | ||
| 40 | return 0; | ||
| 41 | } | ||
| 42 | const bool bit = ((*cur_byte >> next_bit) & 1) != 0; | ||
| 43 | ++next_bit; | ||
| 44 | while (next_bit >= 8) { | ||
| 45 | next_bit -= 8; | ||
| 46 | ++cur_byte; | ||
| 47 | } | ||
| 48 | ++bits_read; | ||
| 49 | return bit; | ||
| 50 | } | ||
| 51 | |||
| 52 | constexpr u32 ReadBits(std::size_t nBits) { | ||
| 53 | u32 ret = 0; | ||
| 54 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 55 | ret |= (ReadBit() & 1) << i; | ||
| 56 | } | ||
| 57 | return ret; | ||
| 58 | } | ||
| 59 | |||
| 60 | template <std::size_t nBits> | ||
| 61 | constexpr u32 ReadBits() { | ||
| 62 | u32 ret = 0; | ||
| 63 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 64 | ret |= (ReadBit() & 1) << i; | ||
| 65 | } | ||
| 66 | return ret; | ||
| 67 | } | ||
| 68 | |||
| 69 | private: | ||
| 70 | const u8* cur_byte; | ||
| 71 | size_t total_bits = 0; | ||
| 72 | size_t next_bit = 0; | ||
| 73 | size_t bits_read = 0; | ||
| 74 | }; | ||
| 75 | |||
| 76 | class OutputBitStream { | ||
| 77 | public: | ||
| 78 | constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) | ||
| 79 | : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} | ||
| 80 | |||
| 81 | constexpr std::size_t GetBitsWritten() const { | ||
| 82 | return bits_written; | ||
| 83 | } | ||
| 84 | |||
| 85 | constexpr void WriteBitsR(u32 val, u32 nBits) { | ||
| 86 | for (u32 i = 0; i < nBits; i++) { | ||
| 87 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | constexpr void WriteBits(u32 val, u32 nBits) { | ||
| 92 | for (u32 i = 0; i < nBits; i++) { | ||
| 93 | WriteBit((val >> i) & 1); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | private: | ||
| 98 | constexpr void WriteBit(bool b) { | ||
| 99 | if (bits_written >= num_bits) { | ||
| 100 | return; | ||
| 101 | } | ||
| 102 | |||
| 103 | const u32 mask = 1 << next_bit++; | ||
| 104 | |||
| 105 | // clear the bit | ||
| 106 | *cur_byte &= static_cast<u8>(~mask); | ||
| 107 | |||
| 108 | // Write the bit, if necessary | ||
| 109 | if (b) | ||
| 110 | *cur_byte |= static_cast<u8>(mask); | ||
| 111 | |||
| 112 | // Next byte? | ||
| 113 | if (next_bit >= 8) { | ||
| 114 | cur_byte += 1; | ||
| 115 | next_bit = 0; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | u8* cur_byte; | ||
| 120 | std::size_t num_bits; | ||
| 121 | std::size_t bits_written = 0; | ||
| 122 | std::size_t next_bit = 0; | ||
| 123 | }; | ||
| 124 | |||
| 125 | template <typename IntType> | ||
| 126 | class Bits { | ||
| 127 | public: | ||
| 128 | explicit Bits(const IntType& v) : m_Bits(v) {} | ||
| 129 | |||
| 130 | Bits(const Bits&) = delete; | ||
| 131 | Bits& operator=(const Bits&) = delete; | ||
| 132 | |||
| 133 | u8 operator[](u32 bitPos) const { | ||
| 134 | return static_cast<u8>((m_Bits >> bitPos) & 1); | ||
| 135 | } | ||
| 136 | |||
| 137 | IntType operator()(u32 start, u32 end) const { | ||
| 138 | if (start == end) { | ||
| 139 | return (*this)[start]; | ||
| 140 | } else if (start > end) { | ||
| 141 | u32 t = start; | ||
| 142 | start = end; | ||
| 143 | end = t; | ||
| 144 | } | ||
| 145 | |||
| 146 | u64 mask = (1 << (end - start + 1)) - 1; | ||
| 147 | return (m_Bits >> start) & static_cast<IntType>(mask); | ||
| 148 | } | ||
| 149 | |||
| 150 | private: | ||
| 151 | const IntType& m_Bits; | ||
| 152 | }; | ||
| 153 | |||
| 154 | namespace Tegra::Texture::ASTC { | ||
| 155 | using IntegerEncodedVector = boost::container::static_vector< | ||
| 156 | IntegerEncodedValue, 256, | ||
| 157 | boost::container::static_vector_options< | ||
| 158 | boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, | ||
| 159 | boost::container::throw_on_overflow<false>>::type>; | ||
| 160 | |||
| 161 | static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { | ||
| 162 | // Implement the algorithm in section C.2.12 | ||
| 163 | std::array<u32, 5> m; | ||
| 164 | std::array<u32, 5> t; | ||
| 165 | u32 T; | ||
| 166 | |||
| 167 | // Read the trit encoded block according to | ||
| 168 | // table C.2.14 | ||
| 169 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 170 | T = bits.ReadBits<2>(); | ||
| 171 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 172 | T |= bits.ReadBits<2>() << 2; | ||
| 173 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 174 | T |= bits.ReadBit() << 4; | ||
| 175 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 176 | T |= bits.ReadBits<2>() << 5; | ||
| 177 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 178 | T |= bits.ReadBit() << 7; | ||
| 179 | |||
| 180 | u32 C = 0; | ||
| 181 | |||
| 182 | Bits<u32> Tb(T); | ||
| 183 | if (Tb(2, 4) == 7) { | ||
| 184 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 185 | t[4] = t[3] = 2; | ||
| 186 | } else { | ||
| 187 | C = Tb(0, 4); | ||
| 188 | if (Tb(5, 6) == 3) { | ||
| 189 | t[4] = 2; | ||
| 190 | t[3] = Tb[7]; | ||
| 191 | } else { | ||
| 192 | t[4] = Tb[7]; | ||
| 193 | t[3] = Tb(5, 6); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | Bits<u32> Cb(C); | ||
| 198 | if (Cb(0, 1) == 3) { | ||
| 199 | t[2] = 2; | ||
| 200 | t[1] = Cb[4]; | ||
| 201 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); | ||
| 202 | } else if (Cb(2, 3) == 3) { | ||
| 203 | t[2] = 2; | ||
| 204 | t[1] = 2; | ||
| 205 | t[0] = Cb(0, 1); | ||
| 206 | } else { | ||
| 207 | t[2] = Cb[4]; | ||
| 208 | t[1] = Cb(2, 3); | ||
| 209 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); | ||
| 210 | } | ||
| 211 | |||
| 212 | for (std::size_t i = 0; i < 5; ++i) { | ||
| 213 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); | ||
| 214 | val.bit_value = m[i]; | ||
| 215 | val.trit_value = t[i]; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, | ||
| 220 | u32 nBitsPerValue) { | ||
| 221 | // Implement the algorithm in section C.2.12 | ||
| 222 | u32 m[3]; | ||
| 223 | u32 q[3]; | ||
| 224 | u32 Q; | ||
| 225 | |||
| 226 | // Read the trit encoded block according to | ||
| 227 | // table C.2.15 | ||
| 228 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 229 | Q = bits.ReadBits<3>(); | ||
| 230 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 231 | Q |= bits.ReadBits<2>() << 3; | ||
| 232 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 233 | Q |= bits.ReadBits<2>() << 5; | ||
| 234 | |||
| 235 | Bits<u32> Qb(Q); | ||
| 236 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 237 | q[0] = q[1] = 4; | ||
| 238 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 239 | } else { | ||
| 240 | u32 C = 0; | ||
| 241 | if (Qb(1, 2) == 3) { | ||
| 242 | q[2] = 4; | ||
| 243 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 244 | } else { | ||
| 245 | q[2] = Qb(5, 6); | ||
| 246 | C = Qb(0, 4); | ||
| 247 | } | ||
| 248 | |||
| 249 | Bits<u32> Cb(C); | ||
| 250 | if (Cb(0, 2) == 5) { | ||
| 251 | q[1] = 4; | ||
| 252 | q[0] = Cb(3, 4); | ||
| 253 | } else { | ||
| 254 | q[1] = Cb(3, 4); | ||
| 255 | q[0] = Cb(0, 2); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | for (std::size_t i = 0; i < 3; ++i) { | ||
| 260 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Quint, nBitsPerValue); | ||
| 261 | val.bit_value = m[i]; | ||
| 262 | val.quint_value = q[i]; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | // Fills result with the values that are encoded in the given | ||
| 267 | // bitstream. We must know beforehand what the maximum possible | ||
| 268 | // value is, and how many values we're decoding. | ||
| 269 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, | ||
| 270 | u32 nValues) { | ||
| 271 | // Determine encoding parameters | ||
| 272 | IntegerEncodedValue val = EncodingsValues[maxRange]; | ||
| 273 | |||
| 274 | // Start decoding | ||
| 275 | u32 nValsDecoded = 0; | ||
| 276 | while (nValsDecoded < nValues) { | ||
| 277 | switch (val.encoding) { | ||
| 278 | case IntegerEncoding::Quint: | ||
| 279 | DecodeQuintBlock(bits, result, val.num_bits); | ||
| 280 | nValsDecoded += 3; | ||
| 281 | break; | ||
| 282 | |||
| 283 | case IntegerEncoding::Trit: | ||
| 284 | DecodeTritBlock(bits, result, val.num_bits); | ||
| 285 | nValsDecoded += 5; | ||
| 286 | break; | ||
| 287 | |||
| 288 | case IntegerEncoding::JustBits: | ||
| 289 | val.bit_value = bits.ReadBits(val.num_bits); | ||
| 290 | result.push_back(val); | ||
| 291 | nValsDecoded++; | ||
| 292 | break; | ||
| 293 | } | ||
| 294 | } | ||
| 295 | } | ||
| 296 | |||
| 297 | struct TexelWeightParams { | ||
| 298 | u32 m_Width = 0; | ||
| 299 | u32 m_Height = 0; | ||
| 300 | bool m_bDualPlane = false; | ||
| 301 | u32 m_MaxWeight = 0; | ||
| 302 | bool m_bError = false; | ||
| 303 | bool m_bVoidExtentLDR = false; | ||
| 304 | bool m_bVoidExtentHDR = false; | ||
| 305 | |||
| 306 | u32 GetPackedBitSize() const { | ||
| 307 | // How many indices do we have? | ||
| 308 | u32 nIdxs = m_Height * m_Width; | ||
| 309 | if (m_bDualPlane) { | ||
| 310 | nIdxs *= 2; | ||
| 311 | } | ||
| 312 | |||
| 313 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32 GetNumWeightValues() const { | ||
| 317 | u32 ret = m_Width * m_Height; | ||
| 318 | if (m_bDualPlane) { | ||
| 319 | ret *= 2; | ||
| 320 | } | ||
| 321 | return ret; | ||
| 322 | } | ||
| 323 | }; | ||
| 324 | |||
| 325 | static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | ||
| 326 | TexelWeightParams params; | ||
| 327 | |||
| 328 | // Read the entire block mode all at once | ||
| 329 | u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); | ||
| 330 | |||
| 331 | // Does this match the void extent block mode? | ||
| 332 | if ((modeBits & 0x01FF) == 0x1FC) { | ||
| 333 | if (modeBits & 0x200) { | ||
| 334 | params.m_bVoidExtentHDR = true; | ||
| 335 | } else { | ||
| 336 | params.m_bVoidExtentLDR = true; | ||
| 337 | } | ||
| 338 | |||
| 339 | // Next two bits must be one. | ||
| 340 | if (!(modeBits & 0x400) || !strm.ReadBit()) { | ||
| 341 | params.m_bError = true; | ||
| 342 | } | ||
| 343 | |||
| 344 | return params; | ||
| 345 | } | ||
| 346 | |||
| 347 | // First check if the last four bits are zero | ||
| 348 | if ((modeBits & 0xF) == 0) { | ||
| 349 | params.m_bError = true; | ||
| 350 | return params; | ||
| 351 | } | ||
| 352 | |||
| 353 | // If the last two bits are zero, then if bits | ||
| 354 | // [6-8] are all ones, this is also reserved. | ||
| 355 | if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) { | ||
| 356 | params.m_bError = true; | ||
| 357 | return params; | ||
| 358 | } | ||
| 359 | |||
| 360 | // Otherwise, there is no error... Figure out the layout | ||
| 361 | // of the block mode. Layout is determined by a number | ||
| 362 | // between 0 and 9 corresponding to table C.2.8 of the | ||
| 363 | // ASTC spec. | ||
| 364 | u32 layout = 0; | ||
| 365 | |||
| 366 | if ((modeBits & 0x1) || (modeBits & 0x2)) { | ||
| 367 | // layout is in [0-4] | ||
| 368 | if (modeBits & 0x8) { | ||
| 369 | // layout is in [2-4] | ||
| 370 | if (modeBits & 0x4) { | ||
| 371 | // layout is in [3-4] | ||
| 372 | if (modeBits & 0x100) { | ||
| 373 | layout = 4; | ||
| 374 | } else { | ||
| 375 | layout = 3; | ||
| 376 | } | ||
| 377 | } else { | ||
| 378 | layout = 2; | ||
| 379 | } | ||
| 380 | } else { | ||
| 381 | // layout is in [0-1] | ||
| 382 | if (modeBits & 0x4) { | ||
| 383 | layout = 1; | ||
| 384 | } else { | ||
| 385 | layout = 0; | ||
| 386 | } | ||
| 387 | } | ||
| 388 | } else { | ||
| 389 | // layout is in [5-9] | ||
| 390 | if (modeBits & 0x100) { | ||
| 391 | // layout is in [7-9] | ||
| 392 | if (modeBits & 0x80) { | ||
| 393 | // layout is in [7-8] | ||
| 394 | assert((modeBits & 0x40) == 0U); | ||
| 395 | if (modeBits & 0x20) { | ||
| 396 | layout = 8; | ||
| 397 | } else { | ||
| 398 | layout = 7; | ||
| 399 | } | ||
| 400 | } else { | ||
| 401 | layout = 9; | ||
| 402 | } | ||
| 403 | } else { | ||
| 404 | // layout is in [5-6] | ||
| 405 | if (modeBits & 0x80) { | ||
| 406 | layout = 6; | ||
| 407 | } else { | ||
| 408 | layout = 5; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | assert(layout < 10); | ||
| 414 | |||
| 415 | // Determine R | ||
| 416 | u32 R = !!(modeBits & 0x10); | ||
| 417 | if (layout < 5) { | ||
| 418 | R |= (modeBits & 0x3) << 1; | ||
| 419 | } else { | ||
| 420 | R |= (modeBits & 0xC) >> 1; | ||
| 421 | } | ||
| 422 | assert(2 <= R && R <= 7); | ||
| 423 | |||
| 424 | // Determine width & height | ||
| 425 | switch (layout) { | ||
| 426 | case 0: { | ||
| 427 | u32 A = (modeBits >> 5) & 0x3; | ||
| 428 | u32 B = (modeBits >> 7) & 0x3; | ||
| 429 | params.m_Width = B + 4; | ||
| 430 | params.m_Height = A + 2; | ||
| 431 | break; | ||
| 432 | } | ||
| 433 | |||
| 434 | case 1: { | ||
| 435 | u32 A = (modeBits >> 5) & 0x3; | ||
| 436 | u32 B = (modeBits >> 7) & 0x3; | ||
| 437 | params.m_Width = B + 8; | ||
| 438 | params.m_Height = A + 2; | ||
| 439 | break; | ||
| 440 | } | ||
| 441 | |||
| 442 | case 2: { | ||
| 443 | u32 A = (modeBits >> 5) & 0x3; | ||
| 444 | u32 B = (modeBits >> 7) & 0x3; | ||
| 445 | params.m_Width = A + 2; | ||
| 446 | params.m_Height = B + 8; | ||
| 447 | break; | ||
| 448 | } | ||
| 449 | |||
| 450 | case 3: { | ||
| 451 | u32 A = (modeBits >> 5) & 0x3; | ||
| 452 | u32 B = (modeBits >> 7) & 0x1; | ||
| 453 | params.m_Width = A + 2; | ||
| 454 | params.m_Height = B + 6; | ||
| 455 | break; | ||
| 456 | } | ||
| 457 | |||
| 458 | case 4: { | ||
| 459 | u32 A = (modeBits >> 5) & 0x3; | ||
| 460 | u32 B = (modeBits >> 7) & 0x1; | ||
| 461 | params.m_Width = B + 2; | ||
| 462 | params.m_Height = A + 2; | ||
| 463 | break; | ||
| 464 | } | ||
| 465 | |||
| 466 | case 5: { | ||
| 467 | u32 A = (modeBits >> 5) & 0x3; | ||
| 468 | params.m_Width = 12; | ||
| 469 | params.m_Height = A + 2; | ||
| 470 | break; | ||
| 471 | } | ||
| 472 | |||
| 473 | case 6: { | ||
| 474 | u32 A = (modeBits >> 5) & 0x3; | ||
| 475 | params.m_Width = A + 2; | ||
| 476 | params.m_Height = 12; | ||
| 477 | break; | ||
| 478 | } | ||
| 479 | |||
| 480 | case 7: { | ||
| 481 | params.m_Width = 6; | ||
| 482 | params.m_Height = 10; | ||
| 483 | break; | ||
| 484 | } | ||
| 485 | |||
| 486 | case 8: { | ||
| 487 | params.m_Width = 10; | ||
| 488 | params.m_Height = 6; | ||
| 489 | break; | ||
| 490 | } | ||
| 491 | |||
| 492 | case 9: { | ||
| 493 | u32 A = (modeBits >> 5) & 0x3; | ||
| 494 | u32 B = (modeBits >> 9) & 0x3; | ||
| 495 | params.m_Width = A + 6; | ||
| 496 | params.m_Height = B + 6; | ||
| 497 | break; | ||
| 498 | } | ||
| 499 | |||
| 500 | default: | ||
| 501 | assert(false && "Don't know this layout..."); | ||
| 502 | params.m_bError = true; | ||
| 503 | break; | ||
| 504 | } | ||
| 505 | |||
| 506 | // Determine whether or not we're using dual planes | ||
| 507 | // and/or high precision layouts. | ||
| 508 | bool D = (layout != 9) && (modeBits & 0x400); | ||
| 509 | bool H = (layout != 9) && (modeBits & 0x200); | ||
| 510 | |||
| 511 | if (H) { | ||
| 512 | const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; | ||
| 513 | params.m_MaxWeight = maxWeights[R - 2]; | ||
| 514 | } else { | ||
| 515 | const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; | ||
| 516 | params.m_MaxWeight = maxWeights[R - 2]; | ||
| 517 | } | ||
| 518 | |||
| 519 | params.m_bDualPlane = D; | ||
| 520 | |||
| 521 | return params; | ||
| 522 | } | ||
| 523 | |||
| 524 | static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, | ||
| 525 | u32 blockHeight) { | ||
| 526 | // Don't actually care about the void extent, just read the bits... | ||
| 527 | for (s32 i = 0; i < 4; ++i) { | ||
| 528 | strm.ReadBits<13>(); | ||
| 529 | } | ||
| 530 | |||
| 531 | // Decode the RGBA components and renormalize them to the range [0, 255] | ||
| 532 | u16 r = static_cast<u16>(strm.ReadBits<16>()); | ||
| 533 | u16 g = static_cast<u16>(strm.ReadBits<16>()); | ||
| 534 | u16 b = static_cast<u16>(strm.ReadBits<16>()); | ||
| 535 | u16 a = static_cast<u16>(strm.ReadBits<16>()); | ||
| 536 | |||
| 537 | u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | | ||
| 538 | (static_cast<u32>(a) & 0xFF00) << 16; | ||
| 539 | |||
| 540 | for (u32 j = 0; j < blockHeight; j++) { | ||
| 541 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 542 | outBuf[j * blockWidth + i] = rgba; | ||
| 543 | } | ||
| 544 | } | ||
| 545 | } | ||
| 546 | |||
| 547 | static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { | ||
| 548 | for (u32 j = 0; j < blockHeight; j++) { | ||
| 549 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 550 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | ||
| 551 | } | ||
| 552 | } | ||
| 553 | } | ||
| 554 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||
| 555 | return REPLICATE_BYTE_TO_16_TABLE[value]; | ||
| 556 | } | ||
| 557 | |||
| 558 | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||
| 559 | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||
| 560 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 561 | } | ||
| 562 | |||
| 563 | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||
| 564 | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||
| 565 | return REPLICATE_BIT_TO_9_TABLE[value]; | ||
| 566 | } | ||
| 567 | |||
| 568 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 569 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 570 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 571 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 572 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 573 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 574 | /// to the runtime implementation | ||
| 575 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 576 | switch (num_bits) { | ||
| 577 | case 1: | ||
| 578 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 579 | case 2: | ||
| 580 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 581 | case 3: | ||
| 582 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 583 | case 4: | ||
| 584 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 585 | case 5: | ||
| 586 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 587 | case 6: | ||
| 588 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 589 | case 7: | ||
| 590 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 591 | case 8: | ||
| 592 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 593 | default: | ||
| 594 | return Replicate(value, num_bits, 8); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | |||
| 598 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 599 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 600 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 601 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 602 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 603 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 604 | switch (num_bits) { | ||
| 605 | case 1: | ||
| 606 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 607 | case 2: | ||
| 608 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 609 | case 3: | ||
| 610 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 611 | case 4: | ||
| 612 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 613 | case 5: | ||
| 614 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 615 | default: | ||
| 616 | return Replicate(value, num_bits, 6); | ||
| 617 | } | ||
| 618 | } | ||
| 619 | |||
| 620 | class Pixel { | ||
| 621 | protected: | ||
| 622 | using ChannelType = s16; | ||
| 623 | u8 m_BitDepth[4] = {8, 8, 8, 8}; | ||
| 624 | s16 color[4] = {}; | ||
| 625 | |||
| 626 | public: | ||
| 627 | Pixel() = default; | ||
| 628 | Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) | ||
| 629 | : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, | ||
| 630 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), | ||
| 631 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | ||
| 632 | |||
| 633 | // Changes the depth of each pixel. This scales the values to | ||
| 634 | // the appropriate bit depth by either truncating the least | ||
| 635 | // significant bits when going from larger to smaller bit depth | ||
| 636 | // or by repeating the most significant bits when going from | ||
| 637 | // smaller to larger bit depths. | ||
| 638 | void ChangeBitDepth() { | ||
| 639 | for (u32 i = 0; i < 4; i++) { | ||
| 640 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); | ||
| 641 | m_BitDepth[i] = 8; | ||
| 642 | } | ||
| 643 | } | ||
| 644 | |||
| 645 | template <typename IntType> | ||
| 646 | static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { | ||
| 647 | float denominator = static_cast<float>((1 << bitDepth) - 1); | ||
| 648 | return static_cast<float>(channel) / denominator; | ||
| 649 | } | ||
| 650 | |||
| 651 | // Changes the bit depth of a single component. See the comment | ||
| 652 | // above for how we do this. | ||
| 653 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { | ||
| 654 | assert(oldDepth <= 8); | ||
| 655 | |||
| 656 | if (oldDepth == 8) { | ||
| 657 | // Do nothing | ||
| 658 | return val; | ||
| 659 | } else if (oldDepth == 0) { | ||
| 660 | return static_cast<ChannelType>((1 << 8) - 1); | ||
| 661 | } else if (8 > oldDepth) { | ||
| 662 | return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); | ||
| 663 | } else { | ||
| 664 | // oldDepth > newDepth | ||
| 665 | const u8 bitsWasted = static_cast<u8>(oldDepth - 8); | ||
| 666 | u16 v = static_cast<u16>(val); | ||
| 667 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | ||
| 668 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); | ||
| 669 | return static_cast<u8>(v); | ||
| 670 | } | ||
| 671 | |||
| 672 | assert(false && "We shouldn't get here."); | ||
| 673 | return 0; | ||
| 674 | } | ||
| 675 | |||
| 676 | const ChannelType& A() const { | ||
| 677 | return color[0]; | ||
| 678 | } | ||
| 679 | ChannelType& A() { | ||
| 680 | return color[0]; | ||
| 681 | } | ||
| 682 | const ChannelType& R() const { | ||
| 683 | return color[1]; | ||
| 684 | } | ||
| 685 | ChannelType& R() { | ||
| 686 | return color[1]; | ||
| 687 | } | ||
| 688 | const ChannelType& G() const { | ||
| 689 | return color[2]; | ||
| 690 | } | ||
| 691 | ChannelType& G() { | ||
| 692 | return color[2]; | ||
| 693 | } | ||
| 694 | const ChannelType& B() const { | ||
| 695 | return color[3]; | ||
| 696 | } | ||
| 697 | ChannelType& B() { | ||
| 698 | return color[3]; | ||
| 699 | } | ||
| 700 | const ChannelType& Component(u32 idx) const { | ||
| 701 | return color[idx]; | ||
| 702 | } | ||
| 703 | ChannelType& Component(u32 idx) { | ||
| 704 | return color[idx]; | ||
| 705 | } | ||
| 706 | |||
| 707 | void GetBitDepth(u8 (&outDepth)[4]) const { | ||
| 708 | for (s32 i = 0; i < 4; i++) { | ||
| 709 | outDepth[i] = m_BitDepth[i]; | ||
| 710 | } | ||
| 711 | } | ||
| 712 | |||
| 713 | // Take all of the components, transform them to their 8-bit variants, | ||
| 714 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume | ||
| 715 | // that the architecture is little-endian, so the alpha channel will end | ||
| 716 | // up in the most-significant byte. | ||
| 717 | u32 Pack() const { | ||
| 718 | Pixel eightBit(*this); | ||
| 719 | eightBit.ChangeBitDepth(); | ||
| 720 | |||
| 721 | u32 r = 0; | ||
| 722 | r |= eightBit.A(); | ||
| 723 | r <<= 8; | ||
| 724 | r |= eightBit.B(); | ||
| 725 | r <<= 8; | ||
| 726 | r |= eightBit.G(); | ||
| 727 | r <<= 8; | ||
| 728 | r |= eightBit.R(); | ||
| 729 | return r; | ||
| 730 | } | ||
| 731 | |||
| 732 | // Clamps the pixel to the range [0,255] | ||
| 733 | void ClampByte() { | ||
| 734 | for (u32 i = 0; i < 4; i++) { | ||
| 735 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | ||
| 736 | } | ||
| 737 | } | ||
| 738 | |||
| 739 | void MakeOpaque() { | ||
| 740 | A() = 255; | ||
| 741 | } | ||
| 742 | }; | ||
| 743 | |||
| 744 | static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions, | ||
| 745 | const u32 nBitsForColorData) { | ||
| 746 | // First figure out how many color values we have | ||
| 747 | u32 nValues = 0; | ||
| 748 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 749 | nValues += ((modes[i] >> 2) + 1) << 1; | ||
| 750 | } | ||
| 751 | |||
| 752 | // Then based on the number of values and the remaining number of bits, | ||
| 753 | // figure out the max value for each of them... | ||
| 754 | u32 range = 256; | ||
| 755 | while (--range > 0) { | ||
| 756 | IntegerEncodedValue val = EncodingsValues[range]; | ||
| 757 | u32 bitLength = val.GetBitLength(nValues); | ||
| 758 | if (bitLength <= nBitsForColorData) { | ||
| 759 | // Find the smallest possible range that matches the given encoding | ||
| 760 | while (--range > 0) { | ||
| 761 | IntegerEncodedValue newval = EncodingsValues[range]; | ||
| 762 | if (!newval.MatchesEncoding(val)) { | ||
| 763 | break; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | // Return to last matching range. | ||
| 768 | range++; | ||
| 769 | break; | ||
| 770 | } | ||
| 771 | } | ||
| 772 | |||
| 773 | // We now have enough to decode our integer sequence. | ||
| 774 | IntegerEncodedVector decodedColorValues; | ||
| 775 | |||
| 776 | InputBitStream colorStream(data, 0); | ||
| 777 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | ||
| 778 | |||
| 779 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | ||
| 780 | // This procedure is outlined in ASTC spec C.2.13 | ||
| 781 | u32 outIdx = 0; | ||
| 782 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { | ||
| 783 | // Have we already decoded all that we need? | ||
| 784 | if (outIdx >= nValues) { | ||
| 785 | break; | ||
| 786 | } | ||
| 787 | |||
| 788 | const IntegerEncodedValue& val = *itr; | ||
| 789 | u32 bitlen = val.num_bits; | ||
| 790 | u32 bitval = val.bit_value; | ||
| 791 | |||
| 792 | assert(bitlen >= 1); | ||
| 793 | |||
| 794 | u32 A = 0, B = 0, C = 0, D = 0; | ||
| 795 | // A is just the lsb replicated 9 times. | ||
| 796 | A = ReplicateBitTo9(bitval & 1); | ||
| 797 | |||
| 798 | switch (val.encoding) { | ||
| 799 | // Replicate bits | ||
| 800 | case IntegerEncoding::JustBits: | ||
| 801 | out[outIdx++] = FastReplicateTo8(bitval, bitlen); | ||
| 802 | break; | ||
| 803 | |||
| 804 | // Use algorithm in C.2.13 | ||
| 805 | case IntegerEncoding::Trit: { | ||
| 806 | |||
| 807 | D = val.trit_value; | ||
| 808 | |||
| 809 | switch (bitlen) { | ||
| 810 | case 1: { | ||
| 811 | C = 204; | ||
| 812 | } break; | ||
| 813 | |||
| 814 | case 2: { | ||
| 815 | C = 93; | ||
| 816 | // B = b000b0bb0 | ||
| 817 | u32 b = (bitval >> 1) & 1; | ||
| 818 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | ||
| 819 | } break; | ||
| 820 | |||
| 821 | case 3: { | ||
| 822 | C = 44; | ||
| 823 | // B = cb000cbcb | ||
| 824 | u32 cb = (bitval >> 1) & 3; | ||
| 825 | B = (cb << 7) | (cb << 2) | cb; | ||
| 826 | } break; | ||
| 827 | |||
| 828 | case 4: { | ||
| 829 | C = 22; | ||
| 830 | // B = dcb000dcb | ||
| 831 | u32 dcb = (bitval >> 1) & 7; | ||
| 832 | B = (dcb << 6) | dcb; | ||
| 833 | } break; | ||
| 834 | |||
| 835 | case 5: { | ||
| 836 | C = 11; | ||
| 837 | // B = edcb000ed | ||
| 838 | u32 edcb = (bitval >> 1) & 0xF; | ||
| 839 | B = (edcb << 5) | (edcb >> 2); | ||
| 840 | } break; | ||
| 841 | |||
| 842 | case 6: { | ||
| 843 | C = 5; | ||
| 844 | // B = fedcb000f | ||
| 845 | u32 fedcb = (bitval >> 1) & 0x1F; | ||
| 846 | B = (fedcb << 4) | (fedcb >> 4); | ||
| 847 | } break; | ||
| 848 | |||
| 849 | default: | ||
| 850 | assert(false && "Unsupported trit encoding for color values!"); | ||
| 851 | break; | ||
| 852 | } // switch(bitlen) | ||
| 853 | } // case IntegerEncoding::Trit | ||
| 854 | break; | ||
| 855 | |||
| 856 | case IntegerEncoding::Quint: { | ||
| 857 | |||
| 858 | D = val.quint_value; | ||
| 859 | |||
| 860 | switch (bitlen) { | ||
| 861 | case 1: { | ||
| 862 | C = 113; | ||
| 863 | } break; | ||
| 864 | |||
| 865 | case 2: { | ||
| 866 | C = 54; | ||
| 867 | // B = b0000bb00 | ||
| 868 | u32 b = (bitval >> 1) & 1; | ||
| 869 | B = (b << 8) | (b << 3) | (b << 2); | ||
| 870 | } break; | ||
| 871 | |||
| 872 | case 3: { | ||
| 873 | C = 26; | ||
| 874 | // B = cb0000cbc | ||
| 875 | u32 cb = (bitval >> 1) & 3; | ||
| 876 | B = (cb << 7) | (cb << 1) | (cb >> 1); | ||
| 877 | } break; | ||
| 878 | |||
| 879 | case 4: { | ||
| 880 | C = 13; | ||
| 881 | // B = dcb0000dc | ||
| 882 | u32 dcb = (bitval >> 1) & 7; | ||
| 883 | B = (dcb << 6) | (dcb >> 1); | ||
| 884 | } break; | ||
| 885 | |||
| 886 | case 5: { | ||
| 887 | C = 6; | ||
| 888 | // B = edcb0000e | ||
| 889 | u32 edcb = (bitval >> 1) & 0xF; | ||
| 890 | B = (edcb << 5) | (edcb >> 3); | ||
| 891 | } break; | ||
| 892 | |||
| 893 | default: | ||
| 894 | assert(false && "Unsupported quint encoding for color values!"); | ||
| 895 | break; | ||
| 896 | } // switch(bitlen) | ||
| 897 | } // case IntegerEncoding::Quint | ||
| 898 | break; | ||
| 899 | } // switch(val.encoding) | ||
| 900 | |||
| 901 | if (val.encoding != IntegerEncoding::JustBits) { | ||
| 902 | u32 T = D * C + B; | ||
| 903 | T ^= A; | ||
| 904 | T = (A & 0x80) | (T >> 2); | ||
| 905 | out[outIdx++] = T; | ||
| 906 | } | ||
| 907 | } | ||
| 908 | |||
| 909 | // Make sure that each of our values is in the proper range... | ||
| 910 | for (u32 i = 0; i < nValues; i++) { | ||
| 911 | assert(out[i] <= 255); | ||
| 912 | } | ||
| 913 | } | ||
| 914 | |||
| 915 | static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | ||
| 916 | u32 bitval = val.bit_value; | ||
| 917 | u32 bitlen = val.num_bits; | ||
| 918 | |||
| 919 | u32 A = ReplicateBitTo7(bitval & 1); | ||
| 920 | u32 B = 0, C = 0, D = 0; | ||
| 921 | |||
| 922 | u32 result = 0; | ||
| 923 | switch (val.encoding) { | ||
| 924 | case IntegerEncoding::JustBits: | ||
| 925 | result = FastReplicateTo6(bitval, bitlen); | ||
| 926 | break; | ||
| 927 | |||
| 928 | case IntegerEncoding::Trit: { | ||
| 929 | D = val.trit_value; | ||
| 930 | assert(D < 3); | ||
| 931 | |||
| 932 | switch (bitlen) { | ||
| 933 | case 0: { | ||
| 934 | u32 results[3] = {0, 32, 63}; | ||
| 935 | result = results[D]; | ||
| 936 | } break; | ||
| 937 | |||
| 938 | case 1: { | ||
| 939 | C = 50; | ||
| 940 | } break; | ||
| 941 | |||
| 942 | case 2: { | ||
| 943 | C = 23; | ||
| 944 | u32 b = (bitval >> 1) & 1; | ||
| 945 | B = (b << 6) | (b << 2) | b; | ||
| 946 | } break; | ||
| 947 | |||
| 948 | case 3: { | ||
| 949 | C = 11; | ||
| 950 | u32 cb = (bitval >> 1) & 3; | ||
| 951 | B = (cb << 5) | cb; | ||
| 952 | } break; | ||
| 953 | |||
| 954 | default: | ||
| 955 | assert(false && "Invalid trit encoding for texel weight"); | ||
| 956 | break; | ||
| 957 | } | ||
| 958 | } break; | ||
| 959 | |||
| 960 | case IntegerEncoding::Quint: { | ||
| 961 | D = val.quint_value; | ||
| 962 | assert(D < 5); | ||
| 963 | |||
| 964 | switch (bitlen) { | ||
| 965 | case 0: { | ||
| 966 | u32 results[5] = {0, 16, 32, 47, 63}; | ||
| 967 | result = results[D]; | ||
| 968 | } break; | ||
| 969 | |||
| 970 | case 1: { | ||
| 971 | C = 28; | ||
| 972 | } break; | ||
| 973 | |||
| 974 | case 2: { | ||
| 975 | C = 13; | ||
| 976 | u32 b = (bitval >> 1) & 1; | ||
| 977 | B = (b << 6) | (b << 1); | ||
| 978 | } break; | ||
| 979 | |||
| 980 | default: | ||
| 981 | assert(false && "Invalid quint encoding for texel weight"); | ||
| 982 | break; | ||
| 983 | } | ||
| 984 | } break; | ||
| 985 | } | ||
| 986 | |||
| 987 | if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { | ||
| 988 | // Decode the value... | ||
| 989 | result = D * C + B; | ||
| 990 | result ^= A; | ||
| 991 | result = (A & 0x20) | (result >> 2); | ||
| 992 | } | ||
| 993 | |||
| 994 | assert(result < 64); | ||
| 995 | |||
| 996 | // Change from [0,63] to [0,64] | ||
| 997 | if (result > 32) { | ||
| 998 | result += 1; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | return result; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, | ||
| 1005 | const TexelWeightParams& params, const u32 blockWidth, | ||
| 1006 | const u32 blockHeight) { | ||
| 1007 | u32 weightIdx = 0; | ||
| 1008 | u32 unquantized[2][144]; | ||
| 1009 | |||
| 1010 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { | ||
| 1011 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); | ||
| 1012 | |||
| 1013 | if (params.m_bDualPlane) { | ||
| 1014 | ++itr; | ||
| 1015 | unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); | ||
| 1016 | if (itr == weights.end()) { | ||
| 1017 | break; | ||
| 1018 | } | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | if (++weightIdx >= (params.m_Width * params.m_Height)) | ||
| 1022 | break; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | // Do infill if necessary (Section C.2.18) ... | ||
| 1026 | u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); | ||
| 1027 | u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); | ||
| 1028 | |||
| 1029 | const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; | ||
| 1030 | for (u32 plane = 0; plane < kPlaneScale; plane++) | ||
| 1031 | for (u32 t = 0; t < blockHeight; t++) | ||
| 1032 | for (u32 s = 0; s < blockWidth; s++) { | ||
| 1033 | u32 cs = Ds * s; | ||
| 1034 | u32 ct = Dt * t; | ||
| 1035 | |||
| 1036 | u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; | ||
| 1037 | u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; | ||
| 1038 | |||
| 1039 | u32 js = gs >> 4; | ||
| 1040 | u32 fs = gs & 0xF; | ||
| 1041 | |||
| 1042 | u32 jt = gt >> 4; | ||
| 1043 | u32 ft = gt & 0x0F; | ||
| 1044 | |||
| 1045 | u32 w11 = (fs * ft + 8) >> 4; | ||
| 1046 | u32 w10 = ft - w11; | ||
| 1047 | u32 w01 = fs - w11; | ||
| 1048 | u32 w00 = 16 - fs - ft + w11; | ||
| 1049 | |||
| 1050 | u32 v0 = js + jt * params.m_Width; | ||
| 1051 | |||
| 1052 | #define FIND_TEXEL(tidx, bidx) \ | ||
| 1053 | u32 p##bidx = 0; \ | ||
| 1054 | do { \ | ||
| 1055 | if ((tidx) < (params.m_Width * params.m_Height)) { \ | ||
| 1056 | p##bidx = unquantized[plane][(tidx)]; \ | ||
| 1057 | } \ | ||
| 1058 | } while (0) | ||
| 1059 | |||
| 1060 | FIND_TEXEL(v0, 00); | ||
| 1061 | FIND_TEXEL(v0 + 1, 01); | ||
| 1062 | FIND_TEXEL(v0 + params.m_Width, 10); | ||
| 1063 | FIND_TEXEL(v0 + params.m_Width + 1, 11); | ||
| 1064 | |||
| 1065 | #undef FIND_TEXEL | ||
| 1066 | |||
| 1067 | out[plane][t * blockWidth + s] = | ||
| 1068 | (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; | ||
| 1069 | } | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | // Transfers a bit as described in C.2.14 | ||
| 1073 | static inline void BitTransferSigned(int& a, int& b) { | ||
| 1074 | b >>= 1; | ||
| 1075 | b |= a & 0x80; | ||
| 1076 | a >>= 1; | ||
| 1077 | a &= 0x3F; | ||
| 1078 | if (a & 0x20) | ||
| 1079 | a -= 0x40; | ||
| 1080 | } | ||
| 1081 | |||
| 1082 | // Adds more precision to the blue channel as described | ||
| 1083 | // in C.2.14 | ||
| 1084 | static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { | ||
| 1085 | return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), | ||
| 1086 | static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | // Partition selection functions as specified in | ||
| 1090 | // C.2.21 | ||
| 1091 | static inline u32 hash52(u32 p) { | ||
| 1092 | p ^= p >> 15; | ||
| 1093 | p -= p << 17; | ||
| 1094 | p += p << 7; | ||
| 1095 | p += p << 4; | ||
| 1096 | p ^= p >> 5; | ||
| 1097 | p += p << 16; | ||
| 1098 | p ^= p >> 7; | ||
| 1099 | p ^= p >> 3; | ||
| 1100 | p ^= p << 6; | ||
| 1101 | p ^= p >> 17; | ||
| 1102 | return p; | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { | ||
| 1106 | if (1 == partitionCount) | ||
| 1107 | return 0; | ||
| 1108 | |||
| 1109 | if (smallBlock) { | ||
| 1110 | x <<= 1; | ||
| 1111 | y <<= 1; | ||
| 1112 | z <<= 1; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | seed += (partitionCount - 1) * 1024; | ||
| 1116 | |||
| 1117 | u32 rnum = hash52(static_cast<u32>(seed)); | ||
| 1118 | u8 seed1 = static_cast<u8>(rnum & 0xF); | ||
| 1119 | u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); | ||
| 1120 | u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); | ||
| 1121 | u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); | ||
| 1122 | u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); | ||
| 1123 | u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); | ||
| 1124 | u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); | ||
| 1125 | u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); | ||
| 1126 | u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); | ||
| 1127 | u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); | ||
| 1128 | u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); | ||
| 1129 | u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); | ||
| 1130 | |||
| 1131 | seed1 = static_cast<u8>(seed1 * seed1); | ||
| 1132 | seed2 = static_cast<u8>(seed2 * seed2); | ||
| 1133 | seed3 = static_cast<u8>(seed3 * seed3); | ||
| 1134 | seed4 = static_cast<u8>(seed4 * seed4); | ||
| 1135 | seed5 = static_cast<u8>(seed5 * seed5); | ||
| 1136 | seed6 = static_cast<u8>(seed6 * seed6); | ||
| 1137 | seed7 = static_cast<u8>(seed7 * seed7); | ||
| 1138 | seed8 = static_cast<u8>(seed8 * seed8); | ||
| 1139 | seed9 = static_cast<u8>(seed9 * seed9); | ||
| 1140 | seed10 = static_cast<u8>(seed10 * seed10); | ||
| 1141 | seed11 = static_cast<u8>(seed11 * seed11); | ||
| 1142 | seed12 = static_cast<u8>(seed12 * seed12); | ||
| 1143 | |||
| 1144 | s32 sh1, sh2, sh3; | ||
| 1145 | if (seed & 1) { | ||
| 1146 | sh1 = (seed & 2) ? 4 : 5; | ||
| 1147 | sh2 = (partitionCount == 3) ? 6 : 5; | ||
| 1148 | } else { | ||
| 1149 | sh1 = (partitionCount == 3) ? 6 : 5; | ||
| 1150 | sh2 = (seed & 2) ? 4 : 5; | ||
| 1151 | } | ||
| 1152 | sh3 = (seed & 0x10) ? sh1 : sh2; | ||
| 1153 | |||
| 1154 | seed1 = static_cast<u8>(seed1 >> sh1); | ||
| 1155 | seed2 = static_cast<u8>(seed2 >> sh2); | ||
| 1156 | seed3 = static_cast<u8>(seed3 >> sh1); | ||
| 1157 | seed4 = static_cast<u8>(seed4 >> sh2); | ||
| 1158 | seed5 = static_cast<u8>(seed5 >> sh1); | ||
| 1159 | seed6 = static_cast<u8>(seed6 >> sh2); | ||
| 1160 | seed7 = static_cast<u8>(seed7 >> sh1); | ||
| 1161 | seed8 = static_cast<u8>(seed8 >> sh2); | ||
| 1162 | seed9 = static_cast<u8>(seed9 >> sh3); | ||
| 1163 | seed10 = static_cast<u8>(seed10 >> sh3); | ||
| 1164 | seed11 = static_cast<u8>(seed11 >> sh3); | ||
| 1165 | seed12 = static_cast<u8>(seed12 >> sh3); | ||
| 1166 | |||
| 1167 | s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | ||
| 1168 | s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | ||
| 1169 | s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | ||
| 1170 | s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | ||
| 1171 | |||
| 1172 | a &= 0x3F; | ||
| 1173 | b &= 0x3F; | ||
| 1174 | c &= 0x3F; | ||
| 1175 | d &= 0x3F; | ||
| 1176 | |||
| 1177 | if (partitionCount < 4) | ||
| 1178 | d = 0; | ||
| 1179 | if (partitionCount < 3) | ||
| 1180 | c = 0; | ||
| 1181 | |||
| 1182 | if (a >= b && a >= c && a >= d) | ||
| 1183 | return 0; | ||
| 1184 | else if (b >= c && b >= d) | ||
| 1185 | return 1; | ||
| 1186 | else if (c >= d) | ||
| 1187 | return 2; | ||
| 1188 | return 3; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { | ||
| 1192 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); | ||
| 1193 | } | ||
| 1194 | |||
| 1195 | // Section C.2.14 | ||
| 1196 | static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, | ||
| 1197 | u32 colorEndpointMode) { | ||
| 1198 | #define READ_UINT_VALUES(N) \ | ||
| 1199 | u32 v[N]; \ | ||
| 1200 | for (u32 i = 0; i < N; i++) { \ | ||
| 1201 | v[i] = *(colorValues++); \ | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | #define READ_INT_VALUES(N) \ | ||
| 1205 | s32 v[N]; \ | ||
| 1206 | for (u32 i = 0; i < N; i++) { \ | ||
| 1207 | v[i] = static_cast<int>(*(colorValues++)); \ | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | switch (colorEndpointMode) { | ||
| 1211 | case 0: { | ||
| 1212 | READ_UINT_VALUES(2) | ||
| 1213 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); | ||
| 1214 | ep2 = Pixel(0xFF, v[1], v[1], v[1]); | ||
| 1215 | } break; | ||
| 1216 | |||
| 1217 | case 1: { | ||
| 1218 | READ_UINT_VALUES(2) | ||
| 1219 | u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); | ||
| 1220 | u32 L1 = std::min(L0 + (v[1] & 0x3F), 0xFFU); | ||
| 1221 | ep1 = Pixel(0xFF, L0, L0, L0); | ||
| 1222 | ep2 = Pixel(0xFF, L1, L1, L1); | ||
| 1223 | } break; | ||
| 1224 | |||
| 1225 | case 4: { | ||
| 1226 | READ_UINT_VALUES(4) | ||
| 1227 | ep1 = Pixel(v[2], v[0], v[0], v[0]); | ||
| 1228 | ep2 = Pixel(v[3], v[1], v[1], v[1]); | ||
| 1229 | } break; | ||
| 1230 | |||
| 1231 | case 5: { | ||
| 1232 | READ_INT_VALUES(4) | ||
| 1233 | BitTransferSigned(v[1], v[0]); | ||
| 1234 | BitTransferSigned(v[3], v[2]); | ||
| 1235 | ep1 = Pixel(v[2], v[0], v[0], v[0]); | ||
| 1236 | ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]); | ||
| 1237 | ep1.ClampByte(); | ||
| 1238 | ep2.ClampByte(); | ||
| 1239 | } break; | ||
| 1240 | |||
| 1241 | case 6: { | ||
| 1242 | READ_UINT_VALUES(4) | ||
| 1243 | ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 1244 | ep2 = Pixel(0xFF, v[0], v[1], v[2]); | ||
| 1245 | } break; | ||
| 1246 | |||
| 1247 | case 8: { | ||
| 1248 | READ_UINT_VALUES(6) | ||
| 1249 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 1250 | ep1 = Pixel(0xFF, v[0], v[2], v[4]); | ||
| 1251 | ep2 = Pixel(0xFF, v[1], v[3], v[5]); | ||
| 1252 | } else { | ||
| 1253 | ep1 = BlueContract(0xFF, v[1], v[3], v[5]); | ||
| 1254 | ep2 = BlueContract(0xFF, v[0], v[2], v[4]); | ||
| 1255 | } | ||
| 1256 | } break; | ||
| 1257 | |||
| 1258 | case 9: { | ||
| 1259 | READ_INT_VALUES(6) | ||
| 1260 | BitTransferSigned(v[1], v[0]); | ||
| 1261 | BitTransferSigned(v[3], v[2]); | ||
| 1262 | BitTransferSigned(v[5], v[4]); | ||
| 1263 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 1264 | ep1 = Pixel(0xFF, v[0], v[2], v[4]); | ||
| 1265 | ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1266 | } else { | ||
| 1267 | ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1268 | ep2 = BlueContract(0xFF, v[0], v[2], v[4]); | ||
| 1269 | } | ||
| 1270 | ep1.ClampByte(); | ||
| 1271 | ep2.ClampByte(); | ||
| 1272 | } break; | ||
| 1273 | |||
| 1274 | case 10: { | ||
| 1275 | READ_UINT_VALUES(6) | ||
| 1276 | ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 1277 | ep2 = Pixel(v[5], v[0], v[1], v[2]); | ||
| 1278 | } break; | ||
| 1279 | |||
| 1280 | case 12: { | ||
| 1281 | READ_UINT_VALUES(8) | ||
| 1282 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 1283 | ep1 = Pixel(v[6], v[0], v[2], v[4]); | ||
| 1284 | ep2 = Pixel(v[7], v[1], v[3], v[5]); | ||
| 1285 | } else { | ||
| 1286 | ep1 = BlueContract(v[7], v[1], v[3], v[5]); | ||
| 1287 | ep2 = BlueContract(v[6], v[0], v[2], v[4]); | ||
| 1288 | } | ||
| 1289 | } break; | ||
| 1290 | |||
| 1291 | case 13: { | ||
| 1292 | READ_INT_VALUES(8) | ||
| 1293 | BitTransferSigned(v[1], v[0]); | ||
| 1294 | BitTransferSigned(v[3], v[2]); | ||
| 1295 | BitTransferSigned(v[5], v[4]); | ||
| 1296 | BitTransferSigned(v[7], v[6]); | ||
| 1297 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 1298 | ep1 = Pixel(v[6], v[0], v[2], v[4]); | ||
| 1299 | ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1300 | } else { | ||
| 1301 | ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1302 | ep2 = BlueContract(v[6], v[0], v[2], v[4]); | ||
| 1303 | } | ||
| 1304 | ep1.ClampByte(); | ||
| 1305 | ep2.ClampByte(); | ||
| 1306 | } break; | ||
| 1307 | |||
| 1308 | default: | ||
| 1309 | assert(false && "Unsupported color endpoint mode (is it HDR?)"); | ||
| 1310 | break; | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | #undef READ_UINT_VALUES | ||
| 1314 | #undef READ_INT_VALUES | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, | ||
| 1318 | const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { | ||
| 1319 | InputBitStream strm(inBuf); | ||
| 1320 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | ||
| 1321 | |||
| 1322 | // Was there an error? | ||
| 1323 | if (weightParams.m_bError) { | ||
| 1324 | assert(false && "Invalid block mode"); | ||
| 1325 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1326 | return; | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | if (weightParams.m_bVoidExtentLDR) { | ||
| 1330 | FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight); | ||
| 1331 | return; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | if (weightParams.m_bVoidExtentHDR) { | ||
| 1335 | assert(false && "HDR void extent blocks are unsupported!"); | ||
| 1336 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1337 | return; | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | if (weightParams.m_Width > blockWidth) { | ||
| 1341 | assert(false && "Texel weight grid width should be smaller than block width"); | ||
| 1342 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1343 | return; | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | if (weightParams.m_Height > blockHeight) { | ||
| 1347 | assert(false && "Texel weight grid height should be smaller than block height"); | ||
| 1348 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1349 | return; | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | // Read num partitions | ||
| 1353 | u32 nPartitions = strm.ReadBits<2>() + 1; | ||
| 1354 | assert(nPartitions <= 4); | ||
| 1355 | |||
| 1356 | if (nPartitions == 4 && weightParams.m_bDualPlane) { | ||
| 1357 | assert(false && "Dual plane mode is incompatible with four partition blocks"); | ||
| 1358 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1359 | return; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | // Based on the number of partitions, read the color endpoint mode for | ||
| 1363 | // each partition. | ||
| 1364 | |||
| 1365 | // Determine partitions, partition index, and color endpoint modes | ||
| 1366 | s32 planeIdx = -1; | ||
| 1367 | u32 partitionIndex; | ||
| 1368 | u32 colorEndpointMode[4] = {0, 0, 0, 0}; | ||
| 1369 | |||
| 1370 | // Define color data. | ||
| 1371 | u8 colorEndpointData[16]; | ||
| 1372 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | ||
| 1373 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | ||
| 1374 | |||
| 1375 | // Read extra config data... | ||
| 1376 | u32 baseCEM = 0; | ||
| 1377 | if (nPartitions == 1) { | ||
| 1378 | colorEndpointMode[0] = strm.ReadBits<4>(); | ||
| 1379 | partitionIndex = 0; | ||
| 1380 | } else { | ||
| 1381 | partitionIndex = strm.ReadBits<10>(); | ||
| 1382 | baseCEM = strm.ReadBits<6>(); | ||
| 1383 | } | ||
| 1384 | u32 baseMode = (baseCEM & 3); | ||
| 1385 | |||
| 1386 | // Remaining bits are color endpoint data... | ||
| 1387 | u32 nWeightBits = weightParams.GetPackedBitSize(); | ||
| 1388 | s32 remainingBits = 128 - nWeightBits - static_cast<int>(strm.GetBitsRead()); | ||
| 1389 | |||
| 1390 | // Consider extra bits prior to texel data... | ||
| 1391 | u32 extraCEMbits = 0; | ||
| 1392 | if (baseMode) { | ||
| 1393 | switch (nPartitions) { | ||
| 1394 | case 2: | ||
| 1395 | extraCEMbits += 2; | ||
| 1396 | break; | ||
| 1397 | case 3: | ||
| 1398 | extraCEMbits += 5; | ||
| 1399 | break; | ||
| 1400 | case 4: | ||
| 1401 | extraCEMbits += 8; | ||
| 1402 | break; | ||
| 1403 | default: | ||
| 1404 | assert(false); | ||
| 1405 | break; | ||
| 1406 | } | ||
| 1407 | } | ||
| 1408 | remainingBits -= extraCEMbits; | ||
| 1409 | |||
| 1410 | // Do we have a dual plane situation? | ||
| 1411 | u32 planeSelectorBits = 0; | ||
| 1412 | if (weightParams.m_bDualPlane) { | ||
| 1413 | planeSelectorBits = 2; | ||
| 1414 | } | ||
| 1415 | remainingBits -= planeSelectorBits; | ||
| 1416 | |||
| 1417 | // Read color data... | ||
| 1418 | u32 colorDataBits = remainingBits; | ||
| 1419 | while (remainingBits > 0) { | ||
| 1420 | u32 nb = std::min(remainingBits, 8); | ||
| 1421 | u32 b = strm.ReadBits(nb); | ||
| 1422 | colorEndpointStream.WriteBits(b, nb); | ||
| 1423 | remainingBits -= 8; | ||
| 1424 | } | ||
| 1425 | |||
| 1426 | // Read the plane selection bits | ||
| 1427 | planeIdx = strm.ReadBits(planeSelectorBits); | ||
| 1428 | |||
| 1429 | // Read the rest of the CEM | ||
| 1430 | if (baseMode) { | ||
| 1431 | u32 extraCEM = strm.ReadBits(extraCEMbits); | ||
| 1432 | u32 CEM = (extraCEM << 6) | baseCEM; | ||
| 1433 | CEM >>= 2; | ||
| 1434 | |||
| 1435 | bool C[4] = {0}; | ||
| 1436 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1437 | C[i] = CEM & 1; | ||
| 1438 | CEM >>= 1; | ||
| 1439 | } | ||
| 1440 | |||
| 1441 | u8 M[4] = {0}; | ||
| 1442 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1443 | M[i] = CEM & 3; | ||
| 1444 | CEM >>= 2; | ||
| 1445 | assert(M[i] <= 3); | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1449 | colorEndpointMode[i] = baseMode; | ||
| 1450 | if (!(C[i])) | ||
| 1451 | colorEndpointMode[i] -= 1; | ||
| 1452 | colorEndpointMode[i] <<= 2; | ||
| 1453 | colorEndpointMode[i] |= M[i]; | ||
| 1454 | } | ||
| 1455 | } else if (nPartitions > 1) { | ||
| 1456 | u32 CEM = baseCEM >> 2; | ||
| 1457 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1458 | colorEndpointMode[i] = CEM; | ||
| 1459 | } | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | // Make sure everything up till here is sane. | ||
| 1463 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1464 | assert(colorEndpointMode[i] < 16); | ||
| 1465 | } | ||
| 1466 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); | ||
| 1467 | |||
| 1468 | // Decode both color data and texel weight data | ||
| 1469 | u32 colorValues[32]; // Four values, two endpoints, four maximum paritions | ||
| 1470 | DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, | ||
| 1471 | colorDataBits); | ||
| 1472 | |||
| 1473 | Pixel endpoints[4][2]; | ||
| 1474 | const u32* colorValuesPtr = colorValues; | ||
| 1475 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1476 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); | ||
| 1477 | } | ||
| 1478 | |||
| 1479 | // Read the texel weight data.. | ||
| 1480 | std::array<u8, 16> texelWeightData; | ||
| 1481 | std::ranges::copy(inBuf, texelWeightData.begin()); | ||
| 1482 | |||
| 1483 | // Reverse everything | ||
| 1484 | for (u32 i = 0; i < 8; i++) { | ||
| 1485 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits | ||
| 1486 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 | ||
| 1487 | u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); | ||
| 1488 | u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); | ||
| 1489 | #undef REVERSE_BYTE | ||
| 1490 | |||
| 1491 | texelWeightData[i] = b; | ||
| 1492 | texelWeightData[15 - i] = a; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | // Make sure that higher non-texel bits are set to zero | ||
| 1496 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | ||
| 1497 | if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) { | ||
| 1498 | texelWeightData[clearByteStart - 1] &= | ||
| 1499 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | ||
| 1500 | std::memset(texelWeightData.data() + clearByteStart, 0, | ||
| 1501 | std::min(16U - clearByteStart, 16U)); | ||
| 1502 | } | ||
| 1503 | |||
| 1504 | IntegerEncodedVector texelWeightValues; | ||
| 1505 | |||
| 1506 | InputBitStream weightStream(texelWeightData); | ||
| 1507 | |||
| 1508 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, | ||
| 1509 | weightParams.GetNumWeightValues()); | ||
| 1510 | |||
| 1511 | // Blocks can be at most 12x12, so we can have as many as 144 weights | ||
| 1512 | u32 weights[2][144]; | ||
| 1513 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); | ||
| 1514 | |||
| 1515 | // Now that we have endpoints and weights, we can interpolate and generate | ||
| 1516 | // the proper decoding... | ||
| 1517 | for (u32 j = 0; j < blockHeight; j++) | ||
| 1518 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 1519 | u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, | ||
| 1520 | (blockHeight * blockWidth) < 32); | ||
| 1521 | assert(partition < nPartitions); | ||
| 1522 | |||
| 1523 | Pixel p; | ||
| 1524 | for (u32 c = 0; c < 4; c++) { | ||
| 1525 | u32 C0 = endpoints[partition][0].Component(c); | ||
| 1526 | C0 = ReplicateByteTo16(C0); | ||
| 1527 | u32 C1 = endpoints[partition][1].Component(c); | ||
| 1528 | C1 = ReplicateByteTo16(C1); | ||
| 1529 | |||
| 1530 | u32 plane = 0; | ||
| 1531 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | ||
| 1532 | plane = 1; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | u32 weight = weights[plane][j * blockWidth + i]; | ||
| 1536 | u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; | ||
| 1537 | if (C == 65535) { | ||
| 1538 | p.Component(c) = 255; | ||
| 1539 | } else { | ||
| 1540 | double Cf = static_cast<double>(C); | ||
| 1541 | p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); | ||
| 1542 | } | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | outBuf[j * blockWidth + i] = p.Pack(); | ||
| 1546 | } | ||
| 1547 | } | ||
| 1548 | |||
| 1549 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | ||
| 1550 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { | ||
| 1551 | u32 block_index = 0; | ||
| 1552 | std::size_t depth_offset = 0; | ||
| 1553 | for (u32 z = 0; z < depth; z++) { | ||
| 1554 | for (u32 y = 0; y < height; y += block_height) { | ||
| 1555 | for (u32 x = 0; x < width; x += block_width) { | ||
| 1556 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; | ||
| 1557 | |||
| 1558 | // Blocks can be at most 12x12 | ||
| 1559 | std::array<u32, 12 * 12> uncompData; | ||
| 1560 | DecompressBlock(blockPtr, block_width, block_height, uncompData); | ||
| 1561 | |||
| 1562 | u32 decompWidth = std::min(block_width, width - x); | ||
| 1563 | u32 decompHeight = std::min(block_height, height - y); | ||
| 1564 | |||
| 1565 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); | ||
| 1566 | for (u32 jj = 0; jj < decompHeight; jj++) { | ||
| 1567 | std::memcpy(outRow.data() + jj * width * 4, | ||
| 1568 | uncompData.data() + jj * block_width, decompWidth * 4); | ||
| 1569 | } | ||
| 1570 | ++block_index; | ||
| 1571 | } | ||
| 1572 | } | ||
| 1573 | depth_offset += height * width * 4; | ||
| 1574 | } | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | } // namespace Tegra::Texture::ASTC | ||
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c73fda5..c1c37dfe7 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -129,4 +129,7 @@ struct AstcBufferData { | |||
| 129 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | 129 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; |
| 130 | } constexpr ASTC_BUFFER_DATA; | 130 | } constexpr ASTC_BUFFER_DATA; |
| 131 | 131 | ||
| 132 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | ||
| 133 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); | ||
| 134 | |||
| 132 | } // namespace Tegra::Texture::ASTC | 135 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 712319783..916a22724 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -809,6 +809,7 @@ void Config::ReadRendererValues() { | |||
| 809 | QStringLiteral("use_asynchronous_gpu_emulation"), true); | 809 | QStringLiteral("use_asynchronous_gpu_emulation"), true); |
| 810 | ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), | 810 | ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), |
| 811 | true); | 811 | true); |
| 812 | ReadSettingGlobal(Settings::values.accelerate_astc, QStringLiteral("accelerate_astc"), true); | ||
| 812 | ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); | 813 | ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); |
| 813 | ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), | 814 | ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), |
| 814 | false); | 815 | false); |
| @@ -1392,6 +1393,7 @@ void Config::SaveRendererValues() { | |||
| 1392 | Settings::values.use_asynchronous_gpu_emulation, true); | 1393 | Settings::values.use_asynchronous_gpu_emulation, true); |
| 1393 | WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, | 1394 | WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, |
| 1394 | true); | 1395 | true); |
| 1396 | WriteSettingGlobal(QStringLiteral("accelerate_astc"), Settings::values.accelerate_astc, true); | ||
| 1395 | WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1397 | WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
| 1396 | WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), | 1398 | WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), |
| 1397 | Settings::values.use_assembly_shaders, false); | 1399 | Settings::values.use_assembly_shaders, false); |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index fb9ec093c..41a69d9b8 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -70,10 +70,12 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 70 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); | 70 | ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); |
| 71 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 71 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 72 | ui->use_nvdec_emulation->setEnabled(runtime_lock); | 72 | ui->use_nvdec_emulation->setEnabled(runtime_lock); |
| 73 | ui->accelerate_astc->setEnabled(runtime_lock); | ||
| 73 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); | 74 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); |
| 74 | ui->use_asynchronous_gpu_emulation->setChecked( | 75 | ui->use_asynchronous_gpu_emulation->setChecked( |
| 75 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); | 76 | Settings::values.use_asynchronous_gpu_emulation.GetValue()); |
| 76 | ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); | 77 | ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); |
| 78 | ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); | ||
| 77 | 79 | ||
| 78 | if (Settings::IsConfiguringGlobal()) { | 80 | if (Settings::IsConfiguringGlobal()) { |
| 79 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); | 81 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); |
| @@ -118,6 +120,8 @@ void ConfigureGraphics::ApplyConfiguration() { | |||
| 118 | use_asynchronous_gpu_emulation); | 120 | use_asynchronous_gpu_emulation); |
| 119 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, | 121 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, |
| 120 | ui->use_nvdec_emulation, use_nvdec_emulation); | 122 | ui->use_nvdec_emulation, use_nvdec_emulation); |
| 123 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, | ||
| 124 | accelerate_astc); | ||
| 121 | 125 | ||
| 122 | if (Settings::IsConfiguringGlobal()) { | 126 | if (Settings::IsConfiguringGlobal()) { |
| 123 | // Guard if during game and set to game-specific value | 127 | // Guard if during game and set to game-specific value |
| @@ -254,6 +258,7 @@ void ConfigureGraphics::SetupPerGameUI() { | |||
| 254 | ui->use_asynchronous_gpu_emulation->setEnabled( | 258 | ui->use_asynchronous_gpu_emulation->setEnabled( |
| 255 | Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); | 259 | Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); |
| 256 | ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); | 260 | ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); |
| 261 | ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); | ||
| 257 | ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); | 262 | ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); |
| 258 | ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); | 263 | ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); |
| 259 | 264 | ||
| @@ -269,6 +274,8 @@ void ConfigureGraphics::SetupPerGameUI() { | |||
| 269 | ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); | 274 | ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); |
| 270 | ConfigurationShared::SetColoredTristate( | 275 | ConfigurationShared::SetColoredTristate( |
| 271 | ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); | 276 | ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); |
| 277 | ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, | ||
| 278 | accelerate_astc); | ||
| 272 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, | 279 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, |
| 273 | Settings::values.use_asynchronous_gpu_emulation, | 280 | Settings::values.use_asynchronous_gpu_emulation, |
| 274 | use_asynchronous_gpu_emulation); | 281 | use_asynchronous_gpu_emulation); |
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c162048a2..6418115cf 100644 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h | |||
| @@ -47,6 +47,7 @@ private: | |||
| 47 | QColor bg_color; | 47 | QColor bg_color; |
| 48 | 48 | ||
| 49 | ConfigurationShared::CheckState use_nvdec_emulation; | 49 | ConfigurationShared::CheckState use_nvdec_emulation; |
| 50 | ConfigurationShared::CheckState accelerate_astc; | ||
| 50 | ConfigurationShared::CheckState use_disk_shader_cache; | 51 | ConfigurationShared::CheckState use_disk_shader_cache; |
| 51 | ConfigurationShared::CheckState use_asynchronous_gpu_emulation; | 52 | ConfigurationShared::CheckState use_asynchronous_gpu_emulation; |
| 52 | 53 | ||
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index ab0bd4d77..5b999d84d 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -105,6 +105,13 @@ | |||
| 105 | </widget> | 105 | </widget> |
| 106 | </item> | 106 | </item> |
| 107 | <item> | 107 | <item> |
| 108 | <widget class="QCheckBox" name="accelerate_astc"> | ||
| 109 | <property name="text"> | ||
| 110 | <string>Accelerate ASTC texture decoding</string> | ||
| 111 | </property> | ||
| 112 | </widget> | ||
| 113 | </item> | ||
| 114 | <item> | ||
| 108 | <widget class="QWidget" name="fullscreen_mode_layout" native="true"> | 115 | <widget class="QWidget" name="fullscreen_mode_layout" native="true"> |
| 109 | <layout class="QHBoxLayout" name="horizontalLayout_1"> | 116 | <layout class="QHBoxLayout" name="horizontalLayout_1"> |
| 110 | <property name="leftMargin"> | 117 | <property name="leftMargin"> |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 107f097d0..621b31571 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -447,8 +447,10 @@ void Config::ReadValues() { | |||
| 447 | sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true)); | 447 | sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", true)); |
| 448 | Settings::values.use_asynchronous_shaders.SetValue( | 448 | Settings::values.use_asynchronous_shaders.SetValue( |
| 449 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); | 449 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); |
| 450 | Settings::values.use_asynchronous_shaders.SetValue( | 450 | Settings::values.use_nvdec_emulation.SetValue( |
| 451 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_shaders", false)); | 451 | sdl2_config->GetBoolean("Renderer", "use_nvdec_emulation", true)); |
| 452 | Settings::values.accelerate_astc.SetValue( | ||
| 453 | sdl2_config->GetBoolean("Renderer", "accelerate_astc", true)); | ||
| 452 | Settings::values.use_fast_gpu_time.SetValue( | 454 | Settings::values.use_fast_gpu_time.SetValue( |
| 453 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); | 455 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true)); |
| 454 | 456 | ||
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index c32421671..efa1b1d18 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -194,6 +194,14 @@ use_assembly_shaders = | |||
| 194 | # 0 (default): Off, 1: On | 194 | # 0 (default): Off, 1: On |
| 195 | use_asynchronous_shaders = | 195 | use_asynchronous_shaders = |
| 196 | 196 | ||
| 197 | # Enable NVDEC emulation. | ||
| 198 | # 0: Off, 1 (default): On | ||
| 199 | use_nvdec_emulation = | ||
| 200 | |||
| 201 | # Accelerate ASTC texture decoding. | ||
| 202 | # 0: Off, 1 (default): On | ||
| 203 | accelerate_astc = | ||
| 204 | |||
| 197 | # Turns on the frame limiter, which will limit frames output to the target game speed | 205 | # Turns on the frame limiter, which will limit frames output to the target game speed |
| 198 | # 0: Off, 1: On (default) | 206 | # 0: Off, 1: On (default) |
| 199 | use_frame_limit = | 207 | use_frame_limit = |