diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 57 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 42 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder_spv_includes.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 2 |
5 files changed, 198 insertions, 27 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e61d9af80..20e8388ee 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -13,6 +13,11 @@ set(GLSL_INCLUDES | |||
| 13 | ${FIDELITYFX_FILES} | 13 | ${FIDELITYFX_FILES} |
| 14 | ) | 14 | ) |
| 15 | 15 | ||
| 16 | set(ASTC_INCLUDES | ||
| 17 | # astc_decoder_glsl_includes.h | ||
| 18 | astc_decoder_spv_includes.h | ||
| 19 | ) | ||
| 20 | |||
| 16 | set(SHADER_FILES | 21 | set(SHADER_FILES |
| 17 | astc_decoder.comp | 22 | astc_decoder.comp |
| 18 | blit_color_float.frag | 23 | blit_color_float.frag |
| @@ -95,9 +100,60 @@ if (NOT GLSLANG_ERROR STREQUAL "") | |||
| 95 | set(QUIET_FLAG "") | 100 | set(QUIET_FLAG "") |
| 96 | endif() | 101 | endif() |
| 97 | 102 | ||
| 103 | macro(ASTC_GEN) | ||
| 104 | # paired list of valid astc block dimensions | ||
| 105 | set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12) | ||
| 106 | set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12) | ||
| 107 | list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS) | ||
| 108 | math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1") | ||
| 109 | foreach(i RANGE ${NUM_ASTC_FORMATS}) | ||
| 110 | list(GET ASTC_WIDTHS ${i} ASTC_WIDTH) | ||
| 111 | list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT) | ||
| 112 | |||
| 113 | # Vulkan SPIR-V Specialization | ||
| 114 | |||
| 115 | string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME) | ||
| 116 | set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h) | ||
| 117 | add_custom_command( | ||
| 118 | OUTPUT | ||
| 119 | ${SPIRV_HEADER_FILE} | ||
| 120 | COMMAND | ||
| 121 | ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||
| 122 | MAIN_DEPENDENCY | ||
| 123 | ${SOURCE_FILE} | ||
| 124 | ) | ||
| 125 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) | ||
| 126 | |||
| 127 | # GLSL Specialization | ||
| 128 | # Disabled as there was no noticeable performance uplift specializing the shaders for OGL | ||
| 129 | |||
| 130 | # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h) | ||
| 131 | # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}") | ||
| 132 | # set(DEFINES_LINE_NUMBER 14) | ||
| 133 | # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME) | ||
| 134 | # add_custom_command( | ||
| 135 | # OUTPUT | ||
| 136 | # ${SOURCE_HEADER_FILE} | ||
| 137 | # COMMAND | ||
| 138 | # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME} | ||
| 139 | # MAIN_DEPENDENCY | ||
| 140 | # ${SOURCE_FILE} | ||
| 141 | # DEPENDS | ||
| 142 | # ${INPUT_FILE} | ||
| 143 | # ${SOURCE_FILE} | ||
| 144 | # ) | ||
| 145 | # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) | ||
| 146 | endforeach() | ||
| 147 | endmacro() | ||
| 148 | |||
| 98 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) | 149 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) |
| 99 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) | 150 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) |
| 100 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) | 151 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) |
| 152 | |||
| 153 | if (${FILENAME} MATCHES "astc_decoder.comp") | ||
| 154 | ASTC_GEN() | ||
| 155 | endif() | ||
| 156 | |||
| 101 | # Skip generating source headers on Vulkan exclusive files | 157 | # Skip generating source headers on Vulkan exclusive files |
| 102 | if (NOT ${FILENAME} MATCHES "vulkan.*") | 158 | if (NOT ${FILENAME} MATCHES "vulkan.*") |
| 103 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) | 159 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) |
| @@ -151,6 +207,7 @@ endforeach() | |||
| 151 | 207 | ||
| 152 | set(SHADER_SOURCES ${SHADER_FILES}) | 208 | set(SHADER_SOURCES ${SHADER_FILES}) |
| 153 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) | 209 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) |
| 210 | list(APPEND SHADER_SOURCES ${ASTC_INCLUDES}) | ||
| 154 | 211 | ||
| 155 | add_custom_target(host_shaders | 212 | add_custom_target(host_shaders |
| 156 | DEPENDS | 213 | DEPENDS |
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index a33c916ac..b4bb8299f 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -24,7 +24,9 @@ | |||
| 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; | 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; |
| 25 | 25 | ||
| 26 | BEGIN_PUSH_CONSTANTS | 26 | BEGIN_PUSH_CONSTANTS |
| 27 | #ifndef BLOCK_WIDTH | ||
| 27 | UNIFORM(1) uvec2 block_dims; | 28 | UNIFORM(1) uvec2 block_dims; |
| 29 | #endif | ||
| 28 | UNIFORM(2) uint layer_stride; | 30 | UNIFORM(2) uint layer_stride; |
| 29 | UNIFORM(3) uint block_size; | 31 | UNIFORM(3) uint block_size; |
| 30 | UNIFORM(4) uint x_shift; | 32 | UNIFORM(4) uint x_shift; |
| @@ -75,7 +77,15 @@ int color_bitsread = 0; | |||
| 75 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode | 77 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode |
| 76 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes | 78 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes |
| 77 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor | 79 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor |
| 80 | |||
| 81 | #ifndef BLOCK_WIDTH | ||
| 82 | #define BLOCK_WIDTH block_dims.x | ||
| 83 | #define BLOCK_HEIGHT block_dims.y | ||
| 78 | #define ARRAY_NUM_ELEMENTS 144 | 84 | #define ARRAY_NUM_ELEMENTS 144 |
| 85 | #else | ||
| 86 | #define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT | ||
| 87 | #endif | ||
| 88 | |||
| 79 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) | 89 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) |
| 80 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; | 90 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; |
| 81 | 91 | ||
| @@ -265,7 +275,7 @@ uint Hash52(uint p) { | |||
| 265 | } | 275 | } |
| 266 | 276 | ||
| 267 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { | 277 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { |
| 268 | if ((block_dims.y * block_dims.x) < 32) { | 278 | if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) { |
| 269 | x <<= 1; | 279 | x <<= 1; |
| 270 | y <<= 1; | 280 | y <<= 1; |
| 271 | } | 281 | } |
| @@ -878,8 +888,8 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 878 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; | 888 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; |
| 879 | 889 | ||
| 880 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | 890 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { |
| 881 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | 891 | const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1)); |
| 882 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | 892 | const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1)); |
| 883 | const uint num_planes = is_dual_plane ? 2 : 1; | 893 | const uint num_planes = is_dual_plane ? 2 : 1; |
| 884 | const uint area = size.x * size.y; | 894 | const uint area = size.x * size.y; |
| 885 | const uint loop_count = min(result_index, area * num_planes); | 895 | const uint loop_count = min(result_index, area * num_planes); |
| @@ -890,8 +900,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 890 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); | 900 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); |
| 891 | } | 901 | } |
| 892 | for (uint plane = 0; plane < num_planes; ++plane) { | 902 | for (uint plane = 0; plane < num_planes; ++plane) { |
| 893 | for (uint t = 0; t < block_dims.y; t++) { | 903 | for (uint t = 0; t < BLOCK_HEIGHT; t++) { |
| 894 | for (uint s = 0; s < block_dims.x; s++) { | 904 | for (uint s = 0; s < BLOCK_WIDTH; s++) { |
| 895 | const uint cs = Ds * s; | 905 | const uint cs = Ds * s; |
| 896 | const uint ct = Dt * t; | 906 | const uint ct = Dt * t; |
| 897 | const uint gs = (cs * (size.x - 1) + 32) >> 6; | 907 | const uint gs = (cs * (size.x - 1) + 32) >> 6; |
| @@ -934,7 +944,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 934 | VectorIndicesFromBase(offset_base); | 944 | VectorIndicesFromBase(offset_base); |
| 935 | p.w = result_vector[array_index][vector_index]; | 945 | p.w = result_vector[array_index][vector_index]; |
| 936 | } | 946 | } |
| 937 | const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; | 947 | const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane; |
| 938 | const uint array_index = offset / 4; | 948 | const uint array_index = offset / 4; |
| 939 | const uint vector_index = offset % 4; | 949 | const uint vector_index = offset % 4; |
| 940 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; | 950 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; |
| @@ -976,8 +986,8 @@ int FindLayout(uint mode) { | |||
| 976 | 986 | ||
| 977 | 987 | ||
| 978 | void FillError(ivec3 coord) { | 988 | void FillError(ivec3 coord) { |
| 979 | for (uint j = 0; j < block_dims.y; j++) { | 989 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 980 | for (uint i = 0; i < block_dims.x; i++) { | 990 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 981 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); | 991 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); |
| 982 | } | 992 | } |
| 983 | } | 993 | } |
| @@ -993,8 +1003,8 @@ void FillVoidExtentLDR(ivec3 coord) { | |||
| 993 | const float r = float(r_u) / 65535.0f; | 1003 | const float r = float(r_u) / 65535.0f; |
| 994 | const float g = float(g_u) / 65535.0f; | 1004 | const float g = float(g_u) / 65535.0f; |
| 995 | const float b = float(b_u) / 65535.0f; | 1005 | const float b = float(b_u) / 65535.0f; |
| 996 | for (uint j = 0; j < block_dims.y; j++) { | 1006 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 997 | for (uint i = 0; i < block_dims.x; i++) { | 1007 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 998 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); | 1008 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); |
| 999 | } | 1009 | } |
| 1000 | } | 1010 | } |
| @@ -1089,7 +1099,7 @@ void DecompressBlock(ivec3 coord) { | |||
| 1089 | return; | 1099 | return; |
| 1090 | } | 1100 | } |
| 1091 | const uvec2 size_params = DecodeBlockSize(mode); | 1101 | const uvec2 size_params = DecodeBlockSize(mode); |
| 1092 | if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) { | 1102 | if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) { |
| 1093 | FillError(coord); | 1103 | FillError(coord); |
| 1094 | return; | 1104 | return; |
| 1095 | } | 1105 | } |
| @@ -1218,21 +1228,21 @@ void DecompressBlock(ivec3 coord) { | |||
| 1218 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); | 1228 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); |
| 1219 | 1229 | ||
| 1220 | UnquantizeTexelWeights(size_params, dual_plane); | 1230 | UnquantizeTexelWeights(size_params, dual_plane); |
| 1221 | for (uint j = 0; j < block_dims.y; j++) { | 1231 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { |
| 1222 | for (uint i = 0; i < block_dims.x; i++) { | 1232 | for (uint i = 0; i < BLOCK_WIDTH; i++) { |
| 1223 | uint local_partition = 0; | 1233 | uint local_partition = 0; |
| 1224 | if (num_partitions > 1) { | 1234 | if (num_partitions > 1) { |
| 1225 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); | 1235 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); |
| 1226 | } | 1236 | } |
| 1227 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); | 1237 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |
| 1228 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); | 1238 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |
| 1229 | const uint weight_offset = (j * block_dims.x + i); | 1239 | const uint weight_offset = (j * BLOCK_WIDTH + i); |
| 1230 | const uint array_index = weight_offset / 4; | 1240 | const uint array_index = weight_offset / 4; |
| 1231 | const uint vector_index = weight_offset % 4; | 1241 | const uint vector_index = weight_offset % 4; |
| 1232 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; | 1242 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; |
| 1233 | uvec4 weight_vec = uvec4(primary_weight); | 1243 | uvec4 weight_vec = uvec4(primary_weight); |
| 1234 | if (dual_plane) { | 1244 | if (dual_plane) { |
| 1235 | const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; | 1245 | const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS; |
| 1236 | const uint secondary_array_index = secondary_weight_offset / 4; | 1246 | const uint secondary_array_index = secondary_weight_offset / 4; |
| 1237 | const uint secondary_vector_index = secondary_weight_offset % 4; | 1247 | const uint secondary_vector_index = secondary_weight_offset % 4; |
| 1238 | const uint secondary_weight = | 1248 | const uint secondary_weight = |
| @@ -1270,7 +1280,7 @@ void main() { | |||
| 1270 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1280 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 1271 | offset += swizzle; | 1281 | offset += swizzle; |
| 1272 | 1282 | ||
| 1273 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); | 1283 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1)); |
| 1274 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | 1284 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { |
| 1275 | return; | 1285 | return; |
| 1276 | } | 1286 | } |
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h new file mode 100644 index 000000000..44ee50c5f --- /dev/null +++ b/src/video_core/host_shaders/astc_decoder_spv_includes.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h" | ||
| 7 | #include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h" | ||
| 8 | #include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h" | ||
| 9 | #include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h" | ||
| 10 | #include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h" | ||
| 11 | #include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h" | ||
| 12 | #include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h" | ||
| 13 | #include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h" | ||
| 14 | #include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h" | ||
| 15 | #include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h" | ||
| 16 | #include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h" | ||
| 17 | #include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h" | ||
| 18 | #include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h" | ||
| 19 | #include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h" | ||
| 20 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 54ee030ce..f22342252 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | 14 | #include "video_core/host_shaders/astc_decoder_spv_includes.h" |
| 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -124,13 +124,62 @@ constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS> | |||
| 124 | }}; | 124 | }}; |
| 125 | 125 | ||
| 126 | struct AstcPushConstants { | 126 | struct AstcPushConstants { |
| 127 | std::array<u32, 2> blocks_dims; | ||
| 128 | u32 layer_stride; | 127 | u32 layer_stride; |
| 129 | u32 block_size; | 128 | u32 block_size; |
| 130 | u32 x_shift; | 129 | u32 x_shift; |
| 131 | u32 block_height; | 130 | u32 block_height; |
| 132 | u32 block_height_mask; | 131 | u32 block_height_mask; |
| 133 | }; | 132 | }; |
| 133 | |||
| 134 | size_t AstcFormatIndex(VideoCore::Surface::PixelFormat format) { | ||
| 135 | switch (format) { | ||
| 136 | case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 137 | case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_UNORM: | ||
| 138 | return 0; | ||
| 139 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 140 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_UNORM: | ||
| 141 | return 1; | ||
| 142 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 143 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_UNORM: | ||
| 144 | return 2; | ||
| 145 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 146 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_UNORM: | ||
| 147 | return 3; | ||
| 148 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 149 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_UNORM: | ||
| 150 | return 4; | ||
| 151 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 152 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_UNORM: | ||
| 153 | return 5; | ||
| 154 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 155 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_UNORM: | ||
| 156 | return 6; | ||
| 157 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 158 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_UNORM: | ||
| 159 | return 7; | ||
| 160 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_SRGB: | ||
| 161 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_UNORM: | ||
| 162 | return 8; | ||
| 163 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 164 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_UNORM: | ||
| 165 | return 9; | ||
| 166 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 167 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_UNORM: | ||
| 168 | return 10; | ||
| 169 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 170 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_UNORM: | ||
| 171 | return 11; | ||
| 172 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 173 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_UNORM: | ||
| 174 | return 12; | ||
| 175 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 176 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_UNORM: | ||
| 177 | return 13; | ||
| 178 | default: | ||
| 179 | UNREACHABLE(); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | } | ||
| 134 | } // Anonymous namespace | 183 | } // Anonymous namespace |
| 135 | 184 | ||
| 136 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | 185 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, |
| @@ -312,19 +361,53 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | |||
| 312 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), | 361 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), |
| 313 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 362 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 314 | compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ | 363 | compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ |
| 315 | memory_allocator_} {} | 364 | memory_allocator_} { |
| 365 | // These must match the order found in AstcFormatIndex | ||
| 366 | static constexpr std::array<std::span<const u32>, 14> ASTC_SHADERS{ | ||
| 367 | ASTC_DECODER_COMP_4X4_SPV, ASTC_DECODER_COMP_5X4_SPV, ASTC_DECODER_COMP_5X5_SPV, | ||
| 368 | ASTC_DECODER_COMP_6X5_SPV, ASTC_DECODER_COMP_6X6_SPV, ASTC_DECODER_COMP_8X5_SPV, | ||
| 369 | ASTC_DECODER_COMP_8X6_SPV, ASTC_DECODER_COMP_8X8_SPV, ASTC_DECODER_COMP_10X5_SPV, | ||
| 370 | ASTC_DECODER_COMP_10X6_SPV, ASTC_DECODER_COMP_10X8_SPV, ASTC_DECODER_COMP_10X10_SPV, | ||
| 371 | ASTC_DECODER_COMP_12X10_SPV, ASTC_DECODER_COMP_12X12_SPV, | ||
| 372 | }; | ||
| 373 | for (size_t index = 0; index < ASTC_SHADERS.size(); ++index) { | ||
| 374 | const auto& code = ASTC_SHADERS[index]; | ||
| 375 | const auto module_ = device.GetLogical().CreateShaderModule({ | ||
| 376 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 377 | .pNext = nullptr, | ||
| 378 | .flags = 0, | ||
| 379 | .codeSize = static_cast<u32>(code.size_bytes()), | ||
| 380 | .pCode = code.data(), | ||
| 381 | }); | ||
| 382 | device.SaveShader(code); | ||
| 383 | astc_pipelines[index] = device.GetLogical().CreateComputePipeline({ | ||
| 384 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 385 | .pNext = nullptr, | ||
| 386 | .flags = 0, | ||
| 387 | .stage{ | ||
| 388 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 389 | .pNext = nullptr, | ||
| 390 | .flags = 0, | ||
| 391 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 392 | .module = *module_, | ||
| 393 | .pName = "main", | ||
| 394 | .pSpecializationInfo = nullptr, | ||
| 395 | }, | ||
| 396 | .layout = *layout, | ||
| 397 | .basePipelineHandle = nullptr, | ||
| 398 | .basePipelineIndex = 0, | ||
| 399 | }); | ||
| 400 | } | ||
| 401 | } | ||
| 316 | 402 | ||
| 317 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 403 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 318 | 404 | ||
| 319 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | 405 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, |
| 320 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 406 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 321 | using namespace VideoCommon::Accelerated; | 407 | using namespace VideoCommon::Accelerated; |
| 322 | const std::array<u32, 2> block_dims{ | 408 | |
| 323 | VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||
| 324 | VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||
| 325 | }; | ||
| 326 | scheduler.RequestOutsideRenderPassOperationContext(); | 409 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 327 | const VkPipeline vk_pipeline = *pipeline; | 410 | const VkPipeline vk_pipeline = *astc_pipelines[AstcFormatIndex(image.info.format)]; |
| 328 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | 411 | const VkImageAspectFlags aspect_mask = image.AspectMask(); |
| 329 | const VkImage vk_image = image.Handle(); | 412 | const VkImage vk_image = image.Handle(); |
| 330 | const bool is_initialized = image.ExchangeInitialization(); | 413 | const bool is_initialized = image.ExchangeInitialization(); |
| @@ -371,10 +454,9 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 371 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 454 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 372 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 455 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 373 | ASSERT(params.bytes_per_block_log2 == 4); | 456 | ASSERT(params.bytes_per_block_log2 == 4); |
| 374 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, | 457 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, params, |
| 375 | params, descriptor_data](vk::CommandBuffer cmdbuf) { | 458 | descriptor_data](vk::CommandBuffer cmdbuf) { |
| 376 | const AstcPushConstants uniforms{ | 459 | const AstcPushConstants uniforms{ |
| 377 | .blocks_dims = block_dims, | ||
| 378 | .layer_stride = params.layer_stride, | 460 | .layer_stride = params.layer_stride, |
| 379 | .block_size = params.block_size, | 461 | .block_size = params.block_size, |
| 380 | .x_shift = params.x_shift, | 462 | .x_shift = params.x_shift, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index dd3927376..1f264bea6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -95,6 +95,8 @@ public: | |||
| 95 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 95 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 96 | 96 | ||
| 97 | private: | 97 | private: |
| 98 | std::array<vk::Pipeline, 14> astc_pipelines; | ||
| 99 | |||
| 98 | Scheduler& scheduler; | 100 | Scheduler& scheduler; |
| 99 | StagingBufferPool& staging_buffer_pool; | 101 | StagingBufferPool& staging_buffer_pool; |
| 100 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | 102 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; |