summaryrefslogtreecommitdiff
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorGravatar Rodrigo Locatti2021-02-13 16:08:50 -0500
committerGravatar ameerj2021-03-13 12:16:03 -0500
commit2f30c105849c214345e2201f4bd6f9b4b76ab4a1 (patch)
tree5e5889a44af4194fcf22d1375bdf9f91b5302dc1 /src/video_core/host_shaders
parentastc_decoder: Fix out of bounds memory access (diff)
downloadyuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.gz
yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.xz
yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.zip
astc_decoder: Reimplement Layers
Reimplements the approach to decoding layers in the compute shader. Fixes multilayer astc decoding when using Vulkan.
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp33
1 files changed, 15 insertions, 18 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 5be716309..b903a2d37 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -39,17 +39,15 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
39BEGIN_PUSH_CONSTANTS 39BEGIN_PUSH_CONSTANTS
40UNIFORM(0) uvec2 num_image_blocks; 40UNIFORM(0) uvec2 num_image_blocks;
41UNIFORM(1) uvec2 block_dims; 41UNIFORM(1) uvec2 block_dims;
42UNIFORM(2) uint layer;
43
44UNIFORM(3) uvec3 origin;
45UNIFORM(4) ivec3 destination;
46UNIFORM(5) uint bytes_per_block_log2;
47UNIFORM(6) uint layer_stride;
48UNIFORM(7) uint block_size;
49UNIFORM(8) uint x_shift;
50UNIFORM(9) uint block_height;
51UNIFORM(10) uint block_height_mask;
52 42
43UNIFORM(2) uvec3 origin;
44UNIFORM(3) ivec3 destination;
45UNIFORM(4) uint bytes_per_block_log2;
46UNIFORM(5) uint layer_stride;
47UNIFORM(6) uint block_size;
48UNIFORM(7) uint x_shift;
49UNIFORM(8) uint block_height;
50UNIFORM(9) uint block_height_mask;
53END_PUSH_CONSTANTS 51END_PUSH_CONSTANTS
54 52
55uint current_index = 0; 53uint current_index = 0;
@@ -82,7 +80,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
82 uint swizzle_table[]; 80 uint swizzle_table[];
83}; 81};
84 82
85layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { 83layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
86 uint astc_data[]; 84 uint astc_data[];
87}; 85};
88 86
@@ -104,7 +102,7 @@ layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BY
104 uint REPLICATE_BYTE_TO_16_TABLE[]; 102 uint REPLICATE_BYTE_TO_16_TABLE[];
105}; 103};
106 104
107layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2D dest_image; 105layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
108 106
109const uint GOB_SIZE_X = 64; 107const uint GOB_SIZE_X = 64;
110const uint GOB_SIZE_Y = 8; 108const uint GOB_SIZE_Y = 8;
@@ -1086,10 +1084,9 @@ TexelWeightParams DecodeBlockInfo(uint block_index) {
1086void FillError(ivec3 coord) { 1084void FillError(ivec3 coord) {
1087 for (uint j = 0; j < block_dims.y; j++) { 1085 for (uint j = 0; j < block_dims.y; j++) {
1088 for (uint i = 0; i < block_dims.x; i++) { 1086 for (uint i = 0; i < block_dims.x; i++) {
1089 imageStore(dest_image, coord.xy + ivec2(i, j), vec4(1.0, 1.0, 0.0, 1.0)); 1087 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0));
1090 } 1088 }
1091 } 1089 }
1092 return;
1093} 1090}
1094 1091
1095void FillVoidExtentLDR(ivec3 coord, uint block_index) { 1092void FillVoidExtentLDR(ivec3 coord, uint block_index) {
@@ -1107,7 +1104,7 @@ void FillVoidExtentLDR(ivec3 coord, uint block_index) {
1107 float b = float(b_u) / 65535.0f; 1104 float b = float(b_u) / 65535.0f;
1108 for (uint j = 0; j < block_dims.y; j++) { 1105 for (uint j = 0; j < block_dims.y; j++) {
1109 for (uint i = 0; i < block_dims.x; i++) { 1106 for (uint i = 0; i < block_dims.x; i++) {
1110 imageStore(dest_image, coord.xy + ivec2(i, j), vec4(r, g, b, a)); 1107 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
1111 } 1108 }
1112 } 1109 }
1113} 1110}
@@ -1264,7 +1261,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
1264 } 1261 }
1265 vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); 1262 vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6);
1266 p = (Cf / 65535.0); 1263 p = (Cf / 65535.0);
1267 imageStore(dest_image, coord.xy + ivec2(i, j), p.gbar); 1264 imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
1268 } 1265 }
1269 } 1266 }
1270} 1267}
@@ -1279,7 +1276,7 @@ void main() {
1279 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; 1276 const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
1280 1277
1281 uint offset = 0; 1278 uint offset = 0;
1282 offset += layer * layer_stride; 1279 offset += pos.z * layer_stride;
1283 offset += (block_y >> block_height) * block_size; 1280 offset += (block_y >> block_height) * block_size;
1284 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; 1281 offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
1285 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; 1282 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
@@ -1287,7 +1284,7 @@ void main() {
1287 1284
1288 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); 1285 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0));
1289 uint block_index = 1286 uint block_index =
1290 layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; 1287 pos.z * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x;
1291 current_index = 0; 1288 current_index = 0;
1292 bitsread = 0; 1289 bitsread = 0;
1293 for (int i = 0; i < 16; i++) { 1290 for (int i = 0; i < 16; i++) {