diff options
| author | 2021-06-19 00:55:13 -0400 | |
|---|---|---|
| committer | 2021-06-19 09:00:33 -0400 | |
| commit | 31b125ef578dd5df4e289d1057154dd34f73cb19 (patch) | |
| tree | c44e0bfe5d201587da1de1351692617f24769b98 /src/video_core/host_shaders | |
| parent | Merge pull request #6484 from CaptV0rt3x/discord-rpc (diff) | |
| download | yuzu-31b125ef578dd5df4e289d1057154dd34f73cb19.tar.gz yuzu-31b125ef578dd5df4e289d1057154dd34f73cb19.tar.xz yuzu-31b125ef578dd5df4e289d1057154dd34f73cb19.zip | |
astc: Various robustness enhancements for the gpu decoder
These changes should help in reducing crashes/drivers panics that may
occur due to synchronization issues between the shader completion and
later access of the decoded texture.
Diffstat (limited to 'src/video_core/host_shaders')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 15 |
1 files changed, 6 insertions, 9 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index eaba1b103..71327e233 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -14,9 +14,8 @@ | |||
| 14 | #define BINDING_6_TO_8_BUFFER 2 | 14 | #define BINDING_6_TO_8_BUFFER 2 |
| 15 | #define BINDING_7_TO_8_BUFFER 3 | 15 | #define BINDING_7_TO_8_BUFFER 3 |
| 16 | #define BINDING_8_TO_8_BUFFER 4 | 16 | #define BINDING_8_TO_8_BUFFER 4 |
| 17 | #define BINDING_BYTE_TO_16_BUFFER 5 | 17 | #define BINDING_SWIZZLE_BUFFER 5 |
| 18 | #define BINDING_SWIZZLE_BUFFER 6 | 18 | #define BINDING_OUTPUT_IMAGE 6 |
| 19 | #define BINDING_OUTPUT_IMAGE 7 | ||
| 20 | 19 | ||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | 20 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
| 22 | 21 | ||
| @@ -29,7 +28,6 @@ | |||
| 29 | #define BINDING_6_TO_8_BUFFER 3 | 28 | #define BINDING_6_TO_8_BUFFER 3 |
| 30 | #define BINDING_7_TO_8_BUFFER 4 | 29 | #define BINDING_7_TO_8_BUFFER 4 |
| 31 | #define BINDING_8_TO_8_BUFFER 5 | 30 | #define BINDING_8_TO_8_BUFFER 5 |
| 32 | #define BINDING_BYTE_TO_16_BUFFER 6 | ||
| 33 | #define BINDING_OUTPUT_IMAGE 0 | 31 | #define BINDING_OUTPUT_IMAGE 0 |
| 34 | 32 | ||
| 35 | #endif | 33 | #endif |
| @@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_ | |||
| 86 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | 84 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { |
| 87 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | 85 | uint REPLICATE_8_BIT_TO_8_TABLE[]; |
| 88 | }; | 86 | }; |
| 89 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 90 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 91 | }; | ||
| 92 | 87 | ||
| 93 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; | 88 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| 94 | 89 | ||
| @@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) { | |||
| 207 | } | 202 | } |
| 208 | 203 | ||
| 209 | uvec4 ReplicateByteTo16(uvec4 value) { | 204 | uvec4 ReplicateByteTo16(uvec4 value) { |
| 210 | return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], | 205 | return value * 0x101; |
| 211 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); | ||
| 212 | } | 206 | } |
| 213 | 207 | ||
| 214 | uint ReplicateBitTo7(uint value) { | 208 | uint ReplicateBitTo7(uint value) { |
| @@ -1327,6 +1321,9 @@ void main() { | |||
| 1327 | offset += swizzle; | 1321 | offset += swizzle; |
| 1328 | 1322 | ||
| 1329 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); | 1323 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); |
| 1324 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | ||
| 1325 | return; | ||
| 1326 | } | ||
| 1330 | uint block_index = | 1327 | uint block_index = |
| 1331 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; | 1328 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; |
| 1332 | 1329 | ||