3 files changed, 43 insertions, 36 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index acd5bdd78..3625b666c 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -42,21 +42,24 @@ constexpr u32 Popcnt(u32 n) {
 class InputBitStream {
 public:
-    constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
+    constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
-        : cur_byte{ptr}, next_bit{start_offset % 8} {}
+        : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {}
-    constexpr std::size_t GetBitsRead() const {
+    constexpr size_t GetBitsRead() const {
        return bits_read;
    }
    constexpr bool ReadBit() {
-        const bool bit = (*cur_byte >> next_bit++) & 1;
+        if (bits_read >= total_bits * 8) {
+            return 0;
+        }
+        const bool bit = ((*cur_byte >> next_bit) & 1) != 0;
+        ++next_bit;
        while (next_bit >= 8) {
            next_bit -= 8;
-            cur_byte++;
+            ++cur_byte;
        }
+        ++bits_read;
-        bits_read++;
        return bit;
    }
@@ -79,8 +82,9 @@ public:
 private:
    const u8* cur_byte;
-    std::size_t next_bit = 0;
+    size_t total_bits = 0;
-    std::size_t bits_read = 0;
+    size_t next_bit = 0;
+    size_t bits_read = 0;
 };
 class OutputBitStream {
@@ -193,15 +197,15 @@ struct IntegerEncodedValue {
    };
 };
 using IntegerEncodedVector = boost::container::static_vector<
-    IntegerEncodedValue, 64,
+    IntegerEncodedValue, 256,
    boost::container::static_vector_options<
        boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
        boost::container::throw_on_overflow<false>>::type>;
 static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
    // Implement the algorithm in section C.2.12
-    u32 m[5];
+    std::array<u32, 5> m;
-    u32 t[5];
+    std::array<u32, 5> t;
    u32 T;
    // Read the trit encoded block according to
@@ -866,7 +870,7 @@ public:
    }
 };
-static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,
+static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions,
                              const u32 nBitsForColorData) {
    // First figure out how many color values we have
    u32 nValues = 0;
@@ -898,7 +902,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
    // We now have enough to decode our integer sequence.
    IntegerEncodedVector decodedColorValues;
-    InputBitStream colorStream(data);
+    InputBitStream colorStream(data, 0);
    DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
    // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1441,7 +1445,7 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
 static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
                            const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
-    InputBitStream strm(inBuf.data());
+    InputBitStream strm(inBuf);
    TexelWeightParams weightParams = DecodeBlockInfo(strm);
    // Was there an error?
@@ -1619,15 +1623,16 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
    // Make sure that higher non-texel bits are set to zero
    const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
-    if (clearByteStart > 0) {
+    if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) {
        texelWeightData[clearByteStart - 1] &=
            static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+        std::memset(texelWeightData.data() + clearByteStart, 0,
+                    std::min(16U - clearByteStart, 16U));
    }
-    std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
    IntegerEncodedVector texelWeightValues;
-    InputBitStream weightStream(texelWeightData.data());
+    InputBitStream weightStream(texelWeightData);
    DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
                          weightParams.GetNumWeightValues());
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 70495aba0..37d7b45a3 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -256,7 +256,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
        .shaderTessellationAndGeometryPointSize = false,
        .shaderImageGatherExtended = true,
        .shaderStorageImageExtendedFormats = false,
-        .shaderStorageImageMultisample = true,
+        .shaderStorageImageMultisample = is_shader_storage_image_multisample,
        .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
        .shaderStorageImageWriteWithoutFormat = true,
        .shaderUniformBufferArrayDynamicIndexing = false,
@@ -804,6 +804,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
 void Device::SetupFeatures() {
    const auto supported_features{physical.GetFeatures()};
    is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
+    is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
    is_blit_depth_stencil_supported = TestDepthStencilBlits();
    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
 }
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index a973c3ce4..4b66dba7a 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -272,23 +272,24 @@ private:
    bool is_optimal_astc_supported{};       ///< Support for native ASTC.
    bool is_float16_supported{};            ///< Support for float16 arithmetics.
    bool is_warp_potentially_bigger{};      ///< Host warp size can be bigger than guest.
-    bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+    bool is_formatless_image_load_supported{};  ///< Support for shader image read without format.
-    bool is_blit_depth_stencil_supported{};    ///< Support for blitting from and to depth stencil.
+    bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
-    bool nv_viewport_swizzle{};                ///< Support for VK_NV_viewport_swizzle.
+    bool is_blit_depth_stencil_supported{};     ///< Support for blitting from and to depth stencil.
-    bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
+    bool nv_viewport_swizzle{};                 ///< Support for VK_NV_viewport_swizzle.
-    bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
+    bool khr_uniform_buffer_standard_layout{};  ///< Support for std430 on UBOs.
-    bool ext_sampler_filter_minmax{};          ///< Support for VK_EXT_sampler_filter_minmax.
+    bool ext_index_type_uint8{};                ///< Support for VK_EXT_index_type_uint8.
-    bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
+    bool ext_sampler_filter_minmax{};           ///< Support for VK_EXT_sampler_filter_minmax.
-    bool ext_shader_viewport_index_layer{};    ///< Support for VK_EXT_shader_viewport_index_layer.
+    bool ext_depth_range_unrestricted{};        ///< Support for VK_EXT_depth_range_unrestricted.
-    bool ext_tooling_info{};                   ///< Support for VK_EXT_tooling_info.
+    bool ext_shader_viewport_index_layer{};     ///< Support for VK_EXT_shader_viewport_index_layer.
-    bool ext_transform_feedback{};             ///< Support for VK_EXT_transform_feedback.
+    bool ext_tooling_info{};                    ///< Support for VK_EXT_tooling_info.
-    bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.
+    bool ext_transform_feedback{};              ///< Support for VK_EXT_transform_feedback.
-    bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
+    bool ext_custom_border_color{};             ///< Support for VK_EXT_custom_border_color.
-    bool ext_robustness2{};                    ///< Support for VK_EXT_robustness2.
+    bool ext_extended_dynamic_state{};          ///< Support for VK_EXT_extended_dynamic_state.
-    bool ext_shader_stencil_export{};          ///< Support for VK_EXT_shader_stencil_export.
+    bool ext_robustness2{};                     ///< Support for VK_EXT_robustness2.
-    bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
+    bool ext_shader_stencil_export{};           ///< Support for VK_EXT_shader_stencil_export.
-    bool has_renderdoc{};                      ///< Has RenderDoc attached
+    bool nv_device_diagnostics_config{};        ///< Support for VK_NV_device_diagnostics_config.
-    bool has_nsight_graphics{};                ///< Has Nsight Graphics attached
+    bool has_renderdoc{};                       ///< Has RenderDoc attached
+    bool has_nsight_graphics{};                 ///< Has Nsight Graphics attached
    // Asynchronous Graphics Pipeline setting
    bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline

diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index acd5bdd78..3625b666c 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp
@@ -42,21 +42,24 @@ constexpr u32 Popcnt(u32 n) {
42		42
43	class InputBitStream {	43	class InputBitStream {
44	public:	44	public:
45	constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)	45	constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0)
46	: cur_byte{ptr}, next_bit{start_offset % 8} {}	46	: cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {}
47		47
48	constexpr std::size_t GetBitsRead() const {	48	constexpr size_t GetBitsRead() const {
49	return bits_read;	49	return bits_read;
50	}	50	}
51		51
52	constexpr bool ReadBit() {	52	constexpr bool ReadBit() {
53	const bool bit = (*cur_byte >> next_bit++) & 1;	53	if (bits_read >= total_bits * 8) {
		54	return 0;
		55	}
		56	const bool bit = ((*cur_byte >> next_bit) & 1) != 0;
		57	++next_bit;
54	while (next_bit >= 8) {	58	while (next_bit >= 8) {
55	next_bit -= 8;	59	next_bit -= 8;
56	cur_byte++;	60	++cur_byte;
57	}	61	}
58		62	++bits_read;
59	bits_read++;
60	return bit;	63	return bit;
61	}	64	}
62		65
@@ -79,8 +82,9 @@ public:
79		82
80	private:	83	private:
81	const u8* cur_byte;	84	const u8* cur_byte;
82	std::size_t next_bit = 0;	85	size_t total_bits = 0;
83	std::size_t bits_read = 0;	86	size_t next_bit = 0;
		87	size_t bits_read = 0;
84	};	88	};
85		89
86	class OutputBitStream {	90	class OutputBitStream {
@@ -193,15 +197,15 @@ struct IntegerEncodedValue {
193	};	197	};
194	};	198	};
195	using IntegerEncodedVector = boost::container::static_vector<	199	using IntegerEncodedVector = boost::container::static_vector<
196	IntegerEncodedValue, 64,	200	IntegerEncodedValue, 256,
197	boost::container::static_vector_options<	201	boost::container::static_vector_options<
198	boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,	202	boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
199	boost::container::throw_on_overflow<false>>::type>;	203	boost::container::throw_on_overflow<false>>::type>;
200		204
201	static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {	205	static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
202	// Implement the algorithm in section C.2.12	206	// Implement the algorithm in section C.2.12
203	u32 m[5];	207	std::array<u32, 5> m;
204	u32 t[5];	208	std::array<u32, 5> t;
205	u32 T;	209	u32 T;
206		210
207	// Read the trit encoded block according to	211	// Read the trit encoded block according to
@@ -866,7 +870,7 @@ public:
866	}	870	}
867	};	871	};
868		872
869	static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions,	873	static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions,
870	const u32 nBitsForColorData) {	874	const u32 nBitsForColorData) {
871	// First figure out how many color values we have	875	// First figure out how many color values we have
872	u32 nValues = 0;	876	u32 nValues = 0;
@@ -898,7 +902,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
898	// We now have enough to decode our integer sequence.	902	// We now have enough to decode our integer sequence.
899	IntegerEncodedVector decodedColorValues;	903	IntegerEncodedVector decodedColorValues;
900		904
901	InputBitStream colorStream(data);	905	InputBitStream colorStream(data, 0);
902	DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);	906	DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
903		907
904	// Once we have the decoded values, we need to dequantize them to the 0-255 range	908	// Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1441,7 +1445,7 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1441		1445
1442	static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,	1446	static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1443	const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {	1447	const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1444	InputBitStream strm(inBuf.data());	1448	InputBitStream strm(inBuf);
1445	TexelWeightParams weightParams = DecodeBlockInfo(strm);	1449	TexelWeightParams weightParams = DecodeBlockInfo(strm);
1446		1450
1447	// Was there an error?	1451	// Was there an error?
@@ -1619,15 +1623,16 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1619		1623
1620	// Make sure that higher non-texel bits are set to zero	1624	// Make sure that higher non-texel bits are set to zero
1621	const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;	1625	const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1622	if (clearByteStart > 0) {	1626	if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) {
1623	texelWeightData[clearByteStart - 1] &=	1627	texelWeightData[clearByteStart - 1] &=
1624	static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);	1628	static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
		1629	std::memset(texelWeightData.data() + clearByteStart, 0,
		1630	std::min(16U - clearByteStart, 16U));
1625	}	1631	}
1626	std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1627		1632
1628	IntegerEncodedVector texelWeightValues;	1633	IntegerEncodedVector texelWeightValues;
1629		1634
1630	InputBitStream weightStream(texelWeightData.data());	1635	InputBitStream weightStream(texelWeightData);
1631		1636
1632	DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,	1637	DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1633	weightParams.GetNumWeightValues());	1638	weightParams.GetNumWeightValues());


diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 70495aba0..37d7b45a3 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -256,7 +256,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
256	.shaderTessellationAndGeometryPointSize = false,	256	.shaderTessellationAndGeometryPointSize = false,
257	.shaderImageGatherExtended = true,	257	.shaderImageGatherExtended = true,
258	.shaderStorageImageExtendedFormats = false,	258	.shaderStorageImageExtendedFormats = false,
259	.shaderStorageImageMultisample = true,	259	.shaderStorageImageMultisample = is_shader_storage_image_multisample,
260	.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,	260	.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
261	.shaderStorageImageWriteWithoutFormat = true,	261	.shaderStorageImageWriteWithoutFormat = true,
262	.shaderUniformBufferArrayDynamicIndexing = false,	262	.shaderUniformBufferArrayDynamicIndexing = false,
@@ -804,6 +804,7 @@ void Device::SetupFamilies(VkSurfaceKHR surface) {
804	void Device::SetupFeatures() {	804	void Device::SetupFeatures() {
805	const auto supported_features{physical.GetFeatures()};	805	const auto supported_features{physical.GetFeatures()};
806	is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;	806	is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
		807	is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample;
807	is_blit_depth_stencil_supported = TestDepthStencilBlits();	808	is_blit_depth_stencil_supported = TestDepthStencilBlits();
808	is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);	809	is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
809	}	810	}


diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index a973c3ce4..4b66dba7a 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -272,23 +272,24 @@ private:
272	bool is_optimal_astc_supported{}; ///< Support for native ASTC.	272	bool is_optimal_astc_supported{}; ///< Support for native ASTC.
273	bool is_float16_supported{}; ///< Support for float16 arithmetics.	273	bool is_float16_supported{}; ///< Support for float16 arithmetics.
274	bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.	274	bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
275	bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.	275	bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
276	bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.	276	bool is_shader_storage_image_multisample{}; ///< Support for image operations on MSAA images.
277	bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.	277	bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
278	bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.	278	bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
279	bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.	279	bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
280	bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.	280	bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
281	bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.	281	bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
282	bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.	282	bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
283	bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.	283	bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
284	bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.	284	bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
285	bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.	285	bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
286	bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.	286	bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
287	bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.	287	bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
288	bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.	288	bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
289	bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.	289	bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
290	bool has_renderdoc{}; ///< Has RenderDoc attached	290	bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
291	bool has_nsight_graphics{}; ///< Has Nsight Graphics attached	291	bool has_renderdoc{}; ///< Has RenderDoc attached
		292	bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
292		293
293	// Asynchronous Graphics Pipeline setting	294	// Asynchronous Graphics Pipeline setting
294	bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline	295	bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline