reuse vectors memory

author: Ameer J 2023-07-30 13:04:41 -0400
committer: Ameer J 2023-08-06 14:54:57 -0400
commit: 0078e5a33822d0e15cc7fab2809e5bc4883cff26 (patch)
tree: f487d8954b16e585be0c3d5c3560d73f93f00bf6 /src
parent: EncodingData pack (diff)
download: yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.gz
yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.xz
yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.zip
1 files changed, 17 insertions, 33 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 37b502324..4277b0756 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -154,19 +154,10 @@ int color_bitsread = 0;
 uint color_values[32];
 int colvals_index = 0;
-// Weight data globals
-uvec4 texel_weight_data;
-int texel_bitsread = 0;
-bool texel_flag = false;
 // Global "vectors" to be pushed into when decoding
 EncodingData result_vector[144];
 int result_index = 0;
-EncodingData texel_vector[144];
-int texel_vector_index = 0;
 // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
 // is the same as [(num_bits - 1):0] and repeats all the way down.
 uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -382,26 +373,15 @@ void SkipBits(uint num_bits) {
 }
 uint StreamColorBits(uint num_bits) {
-    uint ret = 0;
+    const int int_bits = int(num_bits);
-    int int_bits = int(num_bits);
+    const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
-    if (texel_flag) {
+    color_bitsread += int_bits;
-        ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
-        texel_bitsread += int_bits;
-    } else {
-        ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
-        color_bitsread += int_bits;
-    }
    return ret;
 }
 void ResultEmplaceBack(EncodingData val) {
-    if (texel_flag) {
+    result_vector[result_index] = val;
-        texel_vector[texel_vector_index] = val;
+    ++result_index;
-        ++texel_vector_index;
-    } else {
-        result_vector[result_index] = val;
-        ++result_index;
-    }
 }
 // Returns the number of bits required to encode n_vals values.
@@ -910,7 +890,7 @@ void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized
    const uint loop_count = min(result_index, area * num_planes);
    uint unquantized[2 * 144];
    for (uint itr = 0; itr < loop_count; ++itr) {
-        unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]);
+        unquantized[itr] = UnquantizeTexelWeight(result_vector[itr]);
    }
    for (uint plane = 0; plane < num_planes; ++plane) {
        for (uint t = 0; t < block_dims.y; t++) {
@@ -1215,22 +1195,26 @@ void DecompressBlock(ivec3 coord) {
        ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
    }
-    texel_weight_data = local_buff;
+    color_endpoint_data = local_buff;
-    texel_weight_data = bitfieldReverse(texel_weight_data).wzyx;
+    color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
    uint clear_byte_start =
        (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
-    uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) &
+    uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) &
        uint(
            ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
    uint vec_index = (clear_byte_start - 1) >> 2;
-    texel_weight_data[vec_index] =
+    color_endpoint_data[vec_index] =
-        bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
+        bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
    for (uint i = clear_byte_start; i < 16; ++i) {
        uint idx = i >> 2;
-        texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8);
+        color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8);
    }
-    texel_flag = true; // use texel "vector" and bit stream in integer decoding
+    // Re-init vector variables for next decode phase
+    result_index = 0;
+    color_bitsread = 0;
    DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
    uint unquantized_texel_weights[2 * 144];
author	Ameer J	2023-07-30 13:04:41 -0400
committer	Ameer J	2023-08-06 14:54:57 -0400
commit	0078e5a33822d0e15cc7fab2809e5bc4883cff26 (patch)
tree	f487d8954b16e585be0c3d5c3560d73f93f00bf6 /src
parent	EncodingData pack (diff)
download	yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.gz yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.xz yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.zip

diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 37b502324..4277b0756 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -154,19 +154,10 @@ int color_bitsread = 0;
154	uint color_values[32];	154	uint color_values[32];
155	int colvals_index = 0;	155	int colvals_index = 0;
156		156
157	// Weight data globals
158	uvec4 texel_weight_data;
159	int texel_bitsread = 0;
160
161	bool texel_flag = false;
162
163	// Global "vectors" to be pushed into when decoding	157	// Global "vectors" to be pushed into when decoding
164	EncodingData result_vector[144];	158	EncodingData result_vector[144];
165	int result_index = 0;	159	int result_index = 0;
166		160
167	EncodingData texel_vector[144];
168	int texel_vector_index = 0;
169
170	// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]	161	// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
171	// is the same as [(num_bits - 1):0] and repeats all the way down.	162	// is the same as [(num_bits - 1):0] and repeats all the way down.
172	uint Replicate(uint val, uint num_bits, uint to_bit) {	163	uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -382,26 +373,15 @@ void SkipBits(uint num_bits) {
382	}	373	}
383		374
384	uint StreamColorBits(uint num_bits) {	375	uint StreamColorBits(uint num_bits) {
385	uint ret = 0;	376	const int int_bits = int(num_bits);
386	int int_bits = int(num_bits);	377	const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
387	if (texel_flag) {	378	color_bitsread += int_bits;
388	ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
389	texel_bitsread += int_bits;
390	} else {
391	ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
392	color_bitsread += int_bits;
393	}
394	return ret;	379	return ret;
395	}	380	}
396		381
397	void ResultEmplaceBack(EncodingData val) {	382	void ResultEmplaceBack(EncodingData val) {
398	if (texel_flag) {	383	result_vector[result_index] = val;
399	texel_vector[texel_vector_index] = val;	384	++result_index;
400	++texel_vector_index;
401	} else {
402	result_vector[result_index] = val;
403	++result_index;
404	}
405	}	385	}
406		386
407	// Returns the number of bits required to encode n_vals values.	387	// Returns the number of bits required to encode n_vals values.
@@ -910,7 +890,7 @@ void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized
910	const uint loop_count = min(result_index, area * num_planes);	890	const uint loop_count = min(result_index, area * num_planes);
911	uint unquantized[2 * 144];	891	uint unquantized[2 * 144];
912	for (uint itr = 0; itr < loop_count; ++itr) {	892	for (uint itr = 0; itr < loop_count; ++itr) {
913	unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]);	893	unquantized[itr] = UnquantizeTexelWeight(result_vector[itr]);
914	}	894	}
915	for (uint plane = 0; plane < num_planes; ++plane) {	895	for (uint plane = 0; plane < num_planes; ++plane) {
916	for (uint t = 0; t < block_dims.y; t++) {	896	for (uint t = 0; t < block_dims.y; t++) {
@@ -1215,22 +1195,26 @@ void DecompressBlock(ivec3 coord) {
1215	ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);	1195	ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
1216	}	1196	}
1217		1197
1218	texel_weight_data = local_buff;	1198	color_endpoint_data = local_buff;
1219	texel_weight_data = bitfieldReverse(texel_weight_data).wzyx;	1199	color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
1220	uint clear_byte_start =	1200	uint clear_byte_start =
1221	(GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;	1201	(GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
1222		1202
1223	uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) &	1203	uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) &
1224	uint(	1204	uint(
1225	((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));	1205	((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
1226	uint vec_index = (clear_byte_start - 1) >> 2;	1206	uint vec_index = (clear_byte_start - 1) >> 2;
1227	texel_weight_data[vec_index] =	1207	color_endpoint_data[vec_index] =
1228	bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);	1208	bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
1229	for (uint i = clear_byte_start; i < 16; ++i) {	1209	for (uint i = clear_byte_start; i < 16; ++i) {
1230	uint idx = i >> 2;	1210	uint idx = i >> 2;
1231	texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8);	1211	color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8);
1232	}	1212	}
1233	texel_flag = true; // use texel "vector" and bit stream in integer decoding	1213
		1214	// Re-init vector variables for next decode phase
		1215	result_index = 0;
		1216	color_bitsread = 0;
		1217
1234	DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));	1218	DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
1235		1219
1236	uint unquantized_texel_weights[2 * 144];	1220	uint unquantized_texel_weights[2 * 144];