diff options
| author | 2023-06-28 01:24:52 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:58 -0400 | |
| commit | 5a78b35b1abf071bd62b1ff8d0cb939bd30a549f (patch) | |
| tree | 181d40f05679ed3ff2c9711306151886383f71a6 /src/video_core/host_shaders | |
| parent | small_block opt (diff) | |
| download | yuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.tar.gz yuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.tar.xz yuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.zip | |
vulkan dims specialization
Diffstat (limited to 'src/video_core/host_shaders')
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 57 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 42 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder_spv_includes.h | 20 |
3 files changed, 103 insertions, 16 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e61d9af80..20e8388ee 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -13,6 +13,11 @@ set(GLSL_INCLUDES | |||
| 13 | ${FIDELITYFX_FILES} | 13 | ${FIDELITYFX_FILES} |
| 14 | ) | 14 | ) |
| 15 | 15 | ||
| 16 | set(ASTC_INCLUDES | ||
| 17 | # astc_decoder_glsl_includes.h | ||
| 18 | astc_decoder_spv_includes.h | ||
| 19 | ) | ||
| 20 | |||
| 16 | set(SHADER_FILES | 21 | set(SHADER_FILES |
| 17 | astc_decoder.comp | 22 | astc_decoder.comp |
| 18 | blit_color_float.frag | 23 | blit_color_float.frag |
| @@ -95,9 +100,60 @@ if (NOT GLSLANG_ERROR STREQUAL "") | |||
| 95 | set(QUIET_FLAG "") | 100 | set(QUIET_FLAG "") |
| 96 | endif() | 101 | endif() |
| 97 | 102 | ||
| 103 | macro(ASTC_GEN) | ||
| 104 | # paired list of valid astc block dimensions | ||
| 105 | set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12) | ||
| 106 | set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12) | ||
| 107 | list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS) | ||
| 108 | math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1") | ||
| 109 | foreach(i RANGE ${NUM_ASTC_FORMATS}) | ||
| 110 | list(GET ASTC_WIDTHS ${i} ASTC_WIDTH) | ||
| 111 | list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT) | ||
| 112 | |||
| 113 | # Vulkan SPIR-V Specialization | ||
| 114 | |||
| 115 | string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME) | ||
| 116 | set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h) | ||
| 117 | add_custom_command( | ||
| 118 | OUTPUT | ||
| 119 | ${SPIRV_HEADER_FILE} | ||
| 120 | COMMAND | ||
| 121 | ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||
| 122 | MAIN_DEPENDENCY | ||
| 123 | ${SOURCE_FILE} | ||
| 124 | ) | ||
| 125 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) | ||
| 126 | |||
| 127 | # GLSL Specialization | ||
| 128 | # Disabled as there was no noticeable performance uplift specializing the shaders for OGL | ||
| 129 | |||
| 130 | # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h) | ||
| 131 | # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}") | ||
| 132 | # set(DEFINES_LINE_NUMBER 14) | ||
| 133 | # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME) | ||
| 134 | # add_custom_command( | ||
| 135 | # OUTPUT | ||
| 136 | # ${SOURCE_HEADER_FILE} | ||
| 137 | # COMMAND | ||
| 138 | # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME} | ||
| 139 | # MAIN_DEPENDENCY | ||
| 140 | # ${SOURCE_FILE} | ||
| 141 | # DEPENDS | ||
| 142 | # ${INPUT_FILE} | ||
| 143 | # ${SOURCE_FILE} | ||
| 144 | # ) | ||
| 145 | # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) | ||
| 146 | endforeach() | ||
| 147 | endmacro() | ||
| 148 | |||
| 98 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) | 149 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) |
| 99 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) | 150 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) |
| 100 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) | 151 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) |
| 152 | |||
| 153 | if (${FILENAME} MATCHES "astc_decoder.comp") | ||
| 154 | ASTC_GEN() | ||
| 155 | endif() | ||
| 156 | |||
| 101 | # Skip generating source headers on Vulkan exclusive files | 157 | # Skip generating source headers on Vulkan exclusive files |
| 102 | if (NOT ${FILENAME} MATCHES "vulkan.*") | 158 | if (NOT ${FILENAME} MATCHES "vulkan.*") |
| 103 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) | 159 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) |
| @@ -151,6 +207,7 @@ endforeach() | |||
| 151 | 207 | ||
| 152 | set(SHADER_SOURCES ${SHADER_FILES}) | 208 | set(SHADER_SOURCES ${SHADER_FILES}) |
| 153 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) | 209 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) |
| 210 | list(APPEND SHADER_SOURCES ${ASTC_INCLUDES}) | ||
| 154 | 211 | ||
| 155 | add_custom_target(host_shaders | 212 | add_custom_target(host_shaders |
| 156 | DEPENDS | 213 | DEPENDS |
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a33c916ac..b4bb8299f 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -24,7 +24,9 @@ | |||
| 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; | 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; |
| 25 | 25 | ||
| 26 | BEGIN_PUSH_CONSTANTS | 26 | BEGIN_PUSH_CONSTANTS |
| 27 | #ifndef BLOCK_WIDTH | ||
| 27 | UNIFORM(1) uvec2 block_dims; | 28 | UNIFORM(1) uvec2 block_dims; |
| 29 | #endif | ||
| 28 | UNIFORM(2) uint layer_stride; | 30 | UNIFORM(2) uint layer_stride; |
| 29 | UNIFORM(3) uint block_size; | 31 | UNIFORM(3) uint block_size; |
| 30 | UNIFORM(4) uint x_shift; | 32 | UNIFORM(4) uint x_shift; |
| @@ -75,7 +77,15 @@ int color_bitsread = 0; | |||
| 75 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode | 77 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode |
| 76 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes | 78 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes |
| 77 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor | 79 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor |
| 80 | |||
| 81 | #ifndef BLOCK_WIDTH | ||
| 82 | #define BLOCK_WIDTH block_dims.x | ||
| 83 | #define BLOCK_HEIGHT block_dims.y | ||
| 78 | #define ARRAY_NUM_ELEMENTS 144 | 84 | #define ARRAY_NUM_ELEMENTS 144 |
| 85 | #else | ||
| 86 | #define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT | ||
| 87 | #endif | ||
| 88 | |||
| 79 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) | 89 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) |
| 80 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; | 90 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; |
| 81 | 91 | ||
| @@ -265,7 +275,7 @@ uint Hash52(uint p) { | |||
| 265 | } | 275 | } |
| 266 | 276 | ||
| 267 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { | 277 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { |
| 268 | if ((block_dims.y * block_dims.x) < 32) { | 278 | if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) { |
| 269 | x <<= 1; | 279 | x <<= 1; |
| 270 | y <<= 1; | 280 | y <<= 1; |
| 271 | } | 281 | } |
| @@ -878,8 +888,8 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 878 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; | 888 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; |
| 879 | 889 | ||
| 880 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | 890 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { |
| 881 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | 891 | const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1)); |
| 882 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | 892 | const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1)); |
| 883 | const uint num_planes = is_dual_plane ? 2 : 1; | 893 | const uint num_planes = is_dual_plane ? 2 : 1; |
| 884 | const uint area = size.x * size.y; | 894 | const uint area = size.x * size.y; |
| 885 | const uint loop_count = min(result_index, area * num_planes); | 895 | const uint loop_count = min(result_index, area * num_planes); |
| @@ -890,8 +900,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 890 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); | 900 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); |
| 891 | } | 901 | } |
| 892 | for (uint plane = 0; plane < num_planes; ++plane) { | 902 | for (uint plane = 0; plane < num_planes; ++plane) { |
| 893 | for (uint t = 0; t < block_dims.y; t++) { | 903 | for (uint t = 0; t < BLOCK_HEIGHT; t++) { |
| 894 | for (uint s = 0; s < block_dims.x; s++) { | 904 | for (uint s = 0; s < BLOCK_WIDTH; s++) { |
| 895 | const uint cs = Ds * s; | 905 | const uint cs = Ds * s; |
| 896 | const uint ct = Dt * t; | 906 | const uint ct = Dt * t; |
| 897 | const uint gs = (cs * (size.x - 1) + 32) >> 6; | 907 | const uint gs = (cs * (size.x - 1) + 32) >> 6; |
| @@ -934,7 +944,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 934 | VectorIndicesFromBase(offset_base); | 944 | VectorIndicesFromBase(offset_base); |
| 935 | p.w = result_vector[array_index][vector_index]; | 945 | p.w = result_vector[array_index][vector_index]; |
| 936 | } | 946 | } |
| 937 | const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; | 947 | const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane; |
| 938 | const uint array_index = offset / 4; | 948 | const uint array_index = offset / 4; |
| 939 | const uint vector_index = offset % 4; | 949 | const uint vector_index = offset % 4; |
| 940 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; | 950 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; |
| @@ -976,8 +986,8 @@ int FindLayout(uint mode) { | |||
| 976 | 986 | ||
| 977 | 987 | ||
| 978 | void FillError(ivec3 coord) { | 988 | void FillError(ivec3 coord) { |
| 979 | for (uint j = 0; j < block_dims.y; j++) { | 989 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 980 | for (uint i = 0; i < block_dims.x; i++) { | 990 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 981 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); | 991 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); |
| 982 | } | 992 | } |
| 983 | } | 993 | } |
| @@ -993,8 +1003,8 @@ void FillVoidExtentLDR(ivec3 coord) { | |||
| 993 | const float r = float(r_u) / 65535.0f; | 1003 | const float r = float(r_u) / 65535.0f; |
| 994 | const float g = float(g_u) / 65535.0f; | 1004 | const float g = float(g_u) / 65535.0f; |
| 995 | const float b = float(b_u) / 65535.0f; | 1005 | const float b = float(b_u) / 65535.0f; |
| 996 | for (uint j = 0; j < block_dims.y; j++) { | 1006 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 997 | for (uint i = 0; i < block_dims.x; i++) { | 1007 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 998 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); | 1008 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); |
| 999 | } | 1009 | } |
| 1000 | } | 1010 | } |
| @@ -1089,7 +1099,7 @@ void DecompressBlock(ivec3 coord) { | |||
| 1089 | return; | 1099 | return; |
| 1090 | } | 1100 | } |
| 1091 | const uvec2 size_params = DecodeBlockSize(mode); | 1101 | const uvec2 size_params = DecodeBlockSize(mode); |
| 1092 | if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) { | 1102 | if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) { |
| 1093 | FillError(coord); | 1103 | FillError(coord); |
| 1094 | return; | 1104 | return; |
| 1095 | } | 1105 | } |
| @@ -1218,21 +1228,21 @@ void DecompressBlock(ivec3 coord) { | |||
| 1218 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); | 1228 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); |
| 1219 | 1229 | ||
| 1220 | UnquantizeTexelWeights(size_params, dual_plane); | 1230 | UnquantizeTexelWeights(size_params, dual_plane); |
| 1221 | for (uint j = 0; j < block_dims.y; j++) { | 1231 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 1222 | for (uint i = 0; i < block_dims.x; i++) { | 1232 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 1223 | uint local_partition = 0; | 1233 | uint local_partition = 0; |
| 1224 | if (num_partitions > 1) { | 1234 | if (num_partitions > 1) { |
| 1225 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); | 1235 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); |
| 1226 | } | 1236 | } |
| 1227 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); | 1237 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |
| 1228 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); | 1238 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |
| 1229 | const uint weight_offset = (j * block_dims.x + i); | 1239 | const uint weight_offset = (j * BLOCK_WIDTH + i); |
| 1230 | const uint array_index = weight_offset / 4; | 1240 | const uint array_index = weight_offset / 4; |
| 1231 | const uint vector_index = weight_offset % 4; | 1241 | const uint vector_index = weight_offset % 4; |
| 1232 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; | 1242 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; |
| 1233 | uvec4 weight_vec = uvec4(primary_weight); | 1243 | uvec4 weight_vec = uvec4(primary_weight); |
| 1234 | if (dual_plane) { | 1244 | if (dual_plane) { |
| 1235 | const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; | 1245 | const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS; |
| 1236 | const uint secondary_array_index = secondary_weight_offset / 4; | 1246 | const uint secondary_array_index = secondary_weight_offset / 4; |
| 1237 | const uint secondary_vector_index = secondary_weight_offset % 4; | 1247 | const uint secondary_vector_index = secondary_weight_offset % 4; |
| 1238 | const uint secondary_weight = | 1248 | const uint secondary_weight = |
| @@ -1270,7 +1280,7 @@ void main() { | |||
| 1270 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1280 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 1271 | offset += swizzle; | 1281 | offset += swizzle; |
| 1272 | 1282 | ||
| 1273 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); | 1283 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1)); |
| 1274 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | 1284 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { |
| 1275 | return; | 1285 | return; |
| 1276 | } | 1286 | } |
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h new file mode 100644 index 000000000..44ee50c5f --- /dev/null +++ b/src/video_core/host_shaders/astc_decoder_spv_includes.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h" | ||
| 7 | #include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h" | ||
| 8 | #include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h" | ||
| 9 | #include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h" | ||
| 10 | #include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h" | ||
| 11 | #include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h" | ||
| 12 | #include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h" | ||
| 13 | #include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h" | ||
| 14 | #include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h" | ||
| 15 | #include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h" | ||
| 16 | #include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h" | ||
| 17 | #include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h" | ||
| 18 | #include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h" | ||
| 19 | #include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h" | ||
| 20 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||