summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-07-30 12:26:48 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commitac09cc3504fd9c2e256377f75e7ecb187c4bb6f7 (patch)
tree1a64799c9f35653539cd5a51ca4e001dd51a0244 /src
parentparams.max_weight (diff)
downloadyuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.tar.gz
yuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.tar.xz
yuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.zip
weights refactor
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp48
1 files changed, 22 insertions, 26 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index a814ef483..b84ddd67d 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -116,8 +116,6 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
116 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); 116 237, 239, 241, 243, 245, 247, 249, 251, 253, 255);
117 117
118// Input ASTC texture globals 118// Input ASTC texture globals
119uint current_index = 0;
120int bitsread = 0;
121int total_bitsread = 0; 119int total_bitsread = 0;
122uvec4 local_buff; 120uvec4 local_buff;
123 121
@@ -144,13 +142,6 @@ int texel_vector_index = 0;
144 142
145uint unquantized_texel_weights[2][144]; 143uint unquantized_texel_weights[2][144];
146 144
147uint SwizzleOffset(uvec2 pos) {
148 uint x = pos.x;
149 uint y = pos.y;
150 return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
151 (y % 2) * 16 + (x % 16);
152}
153
154// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 145// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
155// is the same as [(num_bits - 1):0] and repeats all the way down. 146// is the same as [(num_bits - 1):0] and repeats all the way down.
156uint Replicate(uint val, uint num_bits, uint to_bit) { 147uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -1224,33 +1215,40 @@ void DecompressBlock(ivec3 coord) {
1224 uint local_partition = 0; 1215 uint local_partition = 0;
1225 if (num_partitions > 1) { 1216 if (num_partitions > 1) {
1226 local_partition = Select2DPartition(partition_index, i, j, num_partitions, 1217 local_partition = Select2DPartition(partition_index, i, j, num_partitions,
1227 (block_dims.y * block_dims.x) < 32); 1218 (block_dims.y * block_dims.x) < 32);
1228 } 1219 }
1229 vec4 p; 1220 const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]);
1230 uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); 1221 const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]);
1231 uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); 1222 const uint weight_offset = (j * block_dims.x + i);
1232 uvec4 plane_vec = uvec4(0); 1223 const uint primary_weight = unquantized_texel_weights[weight_offset][0];
1233 uvec4 weight_vec = uvec4(0); 1224 uvec4 weight_vec = uvec4(primary_weight);
1234 for (uint c = 0; c < 4; c++) { 1225 if (params.dual_plane) {
1235 if (params.dual_plane && (((plane_index + 1) & 3) == c)) { 1226 const uint secondary_weight = unquantized_texel_weights[weight_offset][1];
1236 plane_vec[c] = 1; 1227 for (uint c = 0; c < 4; c++) {
1228 const bool is_secondary = ((plane_index + 1u) & 3u) == c;
1229 weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
1237 } 1230 }
1238 weight_vec[c] = unquantized_texel_weights[plane_vec[c]][j * block_dims.x + i];
1239 } 1231 }
1240 vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); 1232 const vec4 Cf =
1241 p = (Cf / 65535.0); 1233 vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64);
1234 const vec4 p = (Cf / 65535.0);
1242 imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); 1235 imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
1243 } 1236 }
1244 } 1237 }
1245} 1238}
1246 1239
1240
1241uint SwizzleOffset(uvec2 pos) {
1242 uint x = pos.x;
1243 uint y = pos.y;
1244 return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
1245 (y % 2) * 16 + (x % 16);
1246}
1247
1247void main() { 1248void main() {
1248 uvec3 pos = gl_GlobalInvocationID; 1249 uvec3 pos = gl_GlobalInvocationID;
1249 pos.x <<= BYTES_PER_BLOCK_LOG2; 1250 pos.x <<= BYTES_PER_BLOCK_LOG2;
1250
1251 // Read as soon as possible due to its latency
1252 const uint swizzle = SwizzleOffset(pos.xy); 1251 const uint swizzle = SwizzleOffset(pos.xy);
1253
1254 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; 1252 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
1255 1253
1256 uint offset = 0; 1254 uint offset = 0;
@@ -1264,8 +1262,6 @@ void main() {
1264 if (any(greaterThanEqual(coord, imageSize(dest_image)))) { 1262 if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
1265 return; 1263 return;
1266 } 1264 }
1267 current_index = 0;
1268 bitsread = 0;
1269 local_buff = astc_data[offset / 16]; 1265 local_buff = astc_data[offset / 16];
1270 DecompressBlock(coord); 1266 DecompressBlock(coord);
1271} 1267}