diff options
| author | 2023-07-30 12:26:48 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:57 -0400 | |
| commit | ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7 (patch) | |
| tree | 1a64799c9f35653539cd5a51ca4e001dd51a0244 /src | |
| parent | params.max_weight (diff) | |
| download | yuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.tar.gz yuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.tar.xz yuzu-ac09cc3504fd9c2e256377f75e7ecb187c4bb6f7.zip | |
weights refactor
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 48 |
1 files changed, 22 insertions, 26 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a814ef483..b84ddd67d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -116,8 +116,6 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] = | |||
| 116 | 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); | 116 | 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); |
| 117 | 117 | ||
| 118 | // Input ASTC texture globals | 118 | // Input ASTC texture globals |
| 119 | uint current_index = 0; | ||
| 120 | int bitsread = 0; | ||
| 121 | int total_bitsread = 0; | 119 | int total_bitsread = 0; |
| 122 | uvec4 local_buff; | 120 | uvec4 local_buff; |
| 123 | 121 | ||
| @@ -144,13 +142,6 @@ int texel_vector_index = 0; | |||
| 144 | 142 | ||
| 145 | uint unquantized_texel_weights[2][144]; | 143 | uint unquantized_texel_weights[2][144]; |
| 146 | 144 | ||
| 147 | uint SwizzleOffset(uvec2 pos) { | ||
| 148 | uint x = pos.x; | ||
| 149 | uint y = pos.y; | ||
| 150 | return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + | ||
| 151 | (y % 2) * 16 + (x % 16); | ||
| 152 | } | ||
| 153 | |||
| 154 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | 145 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] |
| 155 | // is the same as [(num_bits - 1):0] and repeats all the way down. | 146 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| 156 | uint Replicate(uint val, uint num_bits, uint to_bit) { | 147 | uint Replicate(uint val, uint num_bits, uint to_bit) { |
| @@ -1224,33 +1215,40 @@ void DecompressBlock(ivec3 coord) { | |||
| 1224 | uint local_partition = 0; | 1215 | uint local_partition = 0; |
| 1225 | if (num_partitions > 1) { | 1216 | if (num_partitions > 1) { |
| 1226 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, | 1217 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, |
| 1227 | (block_dims.y * block_dims.x) < 32); | 1218 | (block_dims.y * block_dims.x) < 32); |
| 1228 | } | 1219 | } |
| 1229 | vec4 p; | 1220 | const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); |
| 1230 | uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); | 1221 | const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); |
| 1231 | uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); | 1222 | const uint weight_offset = (j * block_dims.x + i); |
| 1232 | uvec4 plane_vec = uvec4(0); | 1223 | const uint primary_weight = unquantized_texel_weights[weight_offset][0]; |
| 1233 | uvec4 weight_vec = uvec4(0); | 1224 | uvec4 weight_vec = uvec4(primary_weight); |
| 1234 | for (uint c = 0; c < 4; c++) { | 1225 | if (params.dual_plane) { |
| 1235 | if (params.dual_plane && (((plane_index + 1) & 3) == c)) { | 1226 | const uint secondary_weight = unquantized_texel_weights[weight_offset][1]; |
| 1236 | plane_vec[c] = 1; | 1227 | for (uint c = 0; c < 4; c++) { |
| 1228 | const bool is_secondary = ((plane_index + 1u) & 3u) == c; | ||
| 1229 | weight_vec[c] = is_secondary ? secondary_weight : primary_weight; | ||
| 1237 | } | 1230 | } |
| 1238 | weight_vec[c] = unquantized_texel_weights[plane_vec[c]][j * block_dims.x + i]; | ||
| 1239 | } | 1231 | } |
| 1240 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); | 1232 | const vec4 Cf = |
| 1241 | p = (Cf / 65535.0); | 1233 | vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); |
| 1234 | const vec4 p = (Cf / 65535.0); | ||
| 1242 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); | 1235 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); |
| 1243 | } | 1236 | } |
| 1244 | } | 1237 | } |
| 1245 | } | 1238 | } |
| 1246 | 1239 | ||
| 1240 | |||
| 1241 | uint SwizzleOffset(uvec2 pos) { | ||
| 1242 | uint x = pos.x; | ||
| 1243 | uint y = pos.y; | ||
| 1244 | return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + | ||
| 1245 | (y % 2) * 16 + (x % 16); | ||
| 1246 | } | ||
| 1247 | |||
| 1247 | void main() { | 1248 | void main() { |
| 1248 | uvec3 pos = gl_GlobalInvocationID; | 1249 | uvec3 pos = gl_GlobalInvocationID; |
| 1249 | pos.x <<= BYTES_PER_BLOCK_LOG2; | 1250 | pos.x <<= BYTES_PER_BLOCK_LOG2; |
| 1250 | |||
| 1251 | // Read as soon as possible due to its latency | ||
| 1252 | const uint swizzle = SwizzleOffset(pos.xy); | 1251 | const uint swizzle = SwizzleOffset(pos.xy); |
| 1253 | |||
| 1254 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | 1252 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
| 1255 | 1253 | ||
| 1256 | uint offset = 0; | 1254 | uint offset = 0; |
| @@ -1264,8 +1262,6 @@ void main() { | |||
| 1264 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | 1262 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { |
| 1265 | return; | 1263 | return; |
| 1266 | } | 1264 | } |
| 1267 | current_index = 0; | ||
| 1268 | bitsread = 0; | ||
| 1269 | local_buff = astc_data[offset / 16]; | 1265 | local_buff = astc_data[offset / 16]; |
| 1270 | DecompressBlock(coord); | 1266 | DecompressBlock(coord); |
| 1271 | } | 1267 | } |