summaryrefslogtreecommitdiff
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorGravatar ameerj2021-07-31 22:24:15 -0400
committerGravatar ameerj2021-08-01 01:22:26 -0400
commit5ab80535118e593ef3add3ce2b5935437e1dc1d3 (patch)
treea051a40c1ce6bef0d60e1814cc180aa091ffd61f /src/video_core/host_shaders
parentastc_decoder: Make use of uvec4 for payload data (diff)
downloadyuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.gz
yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.xz
yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.zip
astc_decoder: Compute offset swizzles in-shader
Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes.
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp46
1 files changed, 13 insertions, 33 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 392f09c68..74ce058a9 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -10,8 +10,7 @@
10#define END_PUSH_CONSTANTS }; 10#define END_PUSH_CONSTANTS };
11#define UNIFORM(n) 11#define UNIFORM(n)
12#define BINDING_INPUT_BUFFER 0 12#define BINDING_INPUT_BUFFER 0
13#define BINDING_SWIZZLE_BUFFER 1 13#define BINDING_OUTPUT_IMAGE 1
14#define BINDING_OUTPUT_IMAGE 2
15 14
16#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv 15#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
17 16
@@ -19,7 +18,6 @@
19#define END_PUSH_CONSTANTS 18#define END_PUSH_CONSTANTS
20#define UNIFORM(n) layout(location = n) uniform 19#define UNIFORM(n) layout(location = n) uniform
21#define BINDING_INPUT_BUFFER 0 20#define BINDING_INPUT_BUFFER 0
22#define BINDING_SWIZZLE_BUFFER 1
23#define BINDING_OUTPUT_IMAGE 0 21#define BINDING_OUTPUT_IMAGE 0
24 22
25#endif 23#endif
@@ -28,13 +26,11 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
28 26
29BEGIN_PUSH_CONSTANTS 27BEGIN_PUSH_CONSTANTS
30UNIFORM(1) uvec2 block_dims; 28UNIFORM(1) uvec2 block_dims;
31 29UNIFORM(2) uint layer_stride;
32UNIFORM(2) uint bytes_per_block_log2; 30UNIFORM(3) uint block_size;
33UNIFORM(3) uint layer_stride; 31UNIFORM(4) uint x_shift;
34UNIFORM(4) uint block_size; 32UNIFORM(5) uint block_height;
35UNIFORM(5) uint x_shift; 33UNIFORM(6) uint block_height_mask;
36UNIFORM(6) uint block_height;
37UNIFORM(7) uint block_height_mask;
38END_PUSH_CONSTANTS 34END_PUSH_CONSTANTS
39 35
40struct EncodingData { 36struct EncodingData {
@@ -53,35 +49,17 @@ struct TexelWeightParams {
53 bool void_extent_hdr; 49 bool void_extent_hdr;
54}; 50};
55 51
56// Swizzle data
57layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
58 uint swizzle_table[];
59};
60
61layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { 52layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
62 uvec4 astc_data[]; 53 uvec4 astc_data[];
63}; 54};
64 55
65layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; 56layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
66 57
67const uint GOB_SIZE_X = 64;
68const uint GOB_SIZE_Y = 8;
69const uint GOB_SIZE_Z = 1;
70const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
71
72const uint GOB_SIZE_X_SHIFT = 6; 58const uint GOB_SIZE_X_SHIFT = 6;
73const uint GOB_SIZE_Y_SHIFT = 3; 59const uint GOB_SIZE_Y_SHIFT = 3;
74const uint GOB_SIZE_Z_SHIFT = 0; 60const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT;
75const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
76
77const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
78
79const int BLOCK_SIZE_IN_BYTES = 16;
80 61
81const int BLOCK_INFO_ERROR = 0; 62const uint BYTES_PER_BLOCK_LOG2 = 4;
82const int BLOCK_INFO_VOID_EXTENT_HDR = 1;
83const int BLOCK_INFO_VOID_EXTENT_LDR = 2;
84const int BLOCK_INFO_NORMAL = 3;
85 63
86const int JUST_BITS = 0; 64const int JUST_BITS = 0;
87const int QUINT = 1; 65const int QUINT = 1;
@@ -168,8 +146,10 @@ int texel_vector_index = 0;
168uint unquantized_texel_weights[2][144]; 146uint unquantized_texel_weights[2][144];
169 147
170uint SwizzleOffset(uvec2 pos) { 148uint SwizzleOffset(uvec2 pos) {
171 pos = pos & SWIZZLE_MASK; 149 uint x = pos.x;
172 return swizzle_table[pos.y * 64 + pos.x]; 150 uint y = pos.y;
151 return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
152 (y % 2) * 16 + (x % 16);
173} 153}
174 154
175// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 155// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
@@ -1253,7 +1233,7 @@ void DecompressBlock(ivec3 coord) {
1253 1233
1254void main() { 1234void main() {
1255 uvec3 pos = gl_GlobalInvocationID; 1235 uvec3 pos = gl_GlobalInvocationID;
1256 pos.x <<= bytes_per_block_log2; 1236 pos.x <<= BYTES_PER_BLOCK_LOG2;
1257 1237
1258 // Read as soon as possible due to its latency 1238 // Read as soon as possible due to its latency
1259 const uint swizzle = SwizzleOffset(pos.xy); 1239 const uint swizzle = SwizzleOffset(pos.xy);