diff options
| author | 2021-02-13 16:08:50 -0500 | |
|---|---|---|
| committer | 2021-03-13 12:16:03 -0500 | |
| commit | 2f30c105849c214345e2201f4bd6f9b4b76ab4a1 (patch) | |
| tree | 5e5889a44af4194fcf22d1375bdf9f91b5302dc1 /src/video_core/host_shaders | |
| parent | astc_decoder: Fix out of bounds memory access (diff) | |
| download | yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.gz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.xz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.zip | |
astc_decoder: Reimplement Layers
Reimplements the approach to decoding layers in the compute shader. Fixes multilayer astc decoding when using Vulkan.
Diffstat (limited to 'src/video_core/host_shaders')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 33 |
1 files changed, 15 insertions, 18 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 5be716309..b903a2d37 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -39,17 +39,15 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | |||
| 39 | BEGIN_PUSH_CONSTANTS | 39 | BEGIN_PUSH_CONSTANTS |
| 40 | UNIFORM(0) uvec2 num_image_blocks; | 40 | UNIFORM(0) uvec2 num_image_blocks; |
| 41 | UNIFORM(1) uvec2 block_dims; | 41 | UNIFORM(1) uvec2 block_dims; |
| 42 | UNIFORM(2) uint layer; | ||
| 43 | |||
| 44 | UNIFORM(3) uvec3 origin; | ||
| 45 | UNIFORM(4) ivec3 destination; | ||
| 46 | UNIFORM(5) uint bytes_per_block_log2; | ||
| 47 | UNIFORM(6) uint layer_stride; | ||
| 48 | UNIFORM(7) uint block_size; | ||
| 49 | UNIFORM(8) uint x_shift; | ||
| 50 | UNIFORM(9) uint block_height; | ||
| 51 | UNIFORM(10) uint block_height_mask; | ||
| 52 | 42 | ||
| 43 | UNIFORM(2) uvec3 origin; | ||
| 44 | UNIFORM(3) ivec3 destination; | ||
| 45 | UNIFORM(4) uint bytes_per_block_log2; | ||
| 46 | UNIFORM(5) uint layer_stride; | ||
| 47 | UNIFORM(6) uint block_size; | ||
| 48 | UNIFORM(7) uint x_shift; | ||
| 49 | UNIFORM(8) uint block_height; | ||
| 50 | UNIFORM(9) uint block_height_mask; | ||
| 53 | END_PUSH_CONSTANTS | 51 | END_PUSH_CONSTANTS |
| 54 | 52 | ||
| 55 | uint current_index = 0; | 53 | uint current_index = 0; |
| @@ -82,7 +80,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | |||
| 82 | uint swizzle_table[]; | 80 | uint swizzle_table[]; |
| 83 | }; | 81 | }; |
| 84 | 82 | ||
| 85 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { | 83 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { |
| 86 | uint astc_data[]; | 84 | uint astc_data[]; |
| 87 | }; | 85 | }; |
| 88 | 86 | ||
| @@ -104,7 +102,7 @@ layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BY | |||
| 104 | uint REPLICATE_BYTE_TO_16_TABLE[]; | 102 | uint REPLICATE_BYTE_TO_16_TABLE[]; |
| 105 | }; | 103 | }; |
| 106 | 104 | ||
| 107 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2D dest_image; | 105 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| 108 | 106 | ||
| 109 | const uint GOB_SIZE_X = 64; | 107 | const uint GOB_SIZE_X = 64; |
| 110 | const uint GOB_SIZE_Y = 8; | 108 | const uint GOB_SIZE_Y = 8; |
| @@ -1086,10 +1084,9 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { | |||
| 1086 | void FillError(ivec3 coord) { | 1084 | void FillError(ivec3 coord) { |
| 1087 | for (uint j = 0; j < block_dims.y; j++) { | 1085 | for (uint j = 0; j < block_dims.y; j++) { |
| 1088 | for (uint i = 0; i < block_dims.x; i++) { | 1086 | for (uint i = 0; i < block_dims.x; i++) { |
| 1089 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(1.0, 1.0, 0.0, 1.0)); | 1087 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0)); |
| 1090 | } | 1088 | } |
| 1091 | } | 1089 | } |
| 1092 | return; | ||
| 1093 | } | 1090 | } |
| 1094 | 1091 | ||
| 1095 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { | 1092 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { |
| @@ -1107,7 +1104,7 @@ void FillVoidExtentLDR(ivec3 coord, uint block_index) { | |||
| 1107 | float b = float(b_u) / 65535.0f; | 1104 | float b = float(b_u) / 65535.0f; |
| 1108 | for (uint j = 0; j < block_dims.y; j++) { | 1105 | for (uint j = 0; j < block_dims.y; j++) { |
| 1109 | for (uint i = 0; i < block_dims.x; i++) { | 1106 | for (uint i = 0; i < block_dims.x; i++) { |
| 1110 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(r, g, b, a)); | 1107 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); |
| 1111 | } | 1108 | } |
| 1112 | } | 1109 | } |
| 1113 | } | 1110 | } |
| @@ -1264,7 +1261,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1264 | } | 1261 | } |
| 1265 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); | 1262 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); |
| 1266 | p = (Cf / 65535.0); | 1263 | p = (Cf / 65535.0); |
| 1267 | imageStore(dest_image, coord.xy + ivec2(i, j), p.gbar); | 1264 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); |
| 1268 | } | 1265 | } |
| 1269 | } | 1266 | } |
| 1270 | } | 1267 | } |
| @@ -1279,7 +1276,7 @@ void main() { | |||
| 1279 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | 1276 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
| 1280 | 1277 | ||
| 1281 | uint offset = 0; | 1278 | uint offset = 0; |
| 1282 | offset += layer * layer_stride; | 1279 | offset += pos.z * layer_stride; |
| 1283 | offset += (block_y >> block_height) * block_size; | 1280 | offset += (block_y >> block_height) * block_size; |
| 1284 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | 1281 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; |
| 1285 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1282 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| @@ -1287,7 +1284,7 @@ void main() { | |||
| 1287 | 1284 | ||
| 1288 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); | 1285 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); |
| 1289 | uint block_index = | 1286 | uint block_index = |
| 1290 | layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | 1287 | pos.z * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; |
| 1291 | current_index = 0; | 1288 | current_index = 0; |
| 1292 | bitsread = 0; | 1289 | bitsread = 0; |
| 1293 | for (int i = 0; i < 16; i++) { | 1290 | for (int i = 0; i < 16; i++) { |