diff options
| author | 2023-08-02 18:15:59 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:58 -0400 | |
| commit | 8ce158bce6912b2263f1724e6c09d8b517ef18c3 (patch) | |
| tree | ded2c5ccef3c5def9b4ce03b510c5da8e5ea48a9 | |
| parent | vulkan dims specialization (diff) | |
| download | yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.gz yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.xz yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.zip | |
Revert "vulkan dims specialization"
This reverts commit e6243058f2269bd79ac8479d58e55feec2611e9d.
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 57 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 42 | ||||
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder_spv_includes.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 2 |
5 files changed, 27 insertions, 198 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 20e8388ee..e61d9af80 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -13,11 +13,6 @@ set(GLSL_INCLUDES | |||
| 13 | ${FIDELITYFX_FILES} | 13 | ${FIDELITYFX_FILES} |
| 14 | ) | 14 | ) |
| 15 | 15 | ||
| 16 | set(ASTC_INCLUDES | ||
| 17 | # astc_decoder_glsl_includes.h | ||
| 18 | astc_decoder_spv_includes.h | ||
| 19 | ) | ||
| 20 | |||
| 21 | set(SHADER_FILES | 16 | set(SHADER_FILES |
| 22 | astc_decoder.comp | 17 | astc_decoder.comp |
| 23 | blit_color_float.frag | 18 | blit_color_float.frag |
| @@ -100,60 +95,9 @@ if (NOT GLSLANG_ERROR STREQUAL "") | |||
| 100 | set(QUIET_FLAG "") | 95 | set(QUIET_FLAG "") |
| 101 | endif() | 96 | endif() |
| 102 | 97 | ||
| 103 | macro(ASTC_GEN) | ||
| 104 | # paired list of valid astc block dimensions | ||
| 105 | set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12) | ||
| 106 | set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12) | ||
| 107 | list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS) | ||
| 108 | math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1") | ||
| 109 | foreach(i RANGE ${NUM_ASTC_FORMATS}) | ||
| 110 | list(GET ASTC_WIDTHS ${i} ASTC_WIDTH) | ||
| 111 | list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT) | ||
| 112 | |||
| 113 | # Vulkan SPIR-V Specialization | ||
| 114 | |||
| 115 | string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME) | ||
| 116 | set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h) | ||
| 117 | add_custom_command( | ||
| 118 | OUTPUT | ||
| 119 | ${SPIRV_HEADER_FILE} | ||
| 120 | COMMAND | ||
| 121 | ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} | ||
| 122 | MAIN_DEPENDENCY | ||
| 123 | ${SOURCE_FILE} | ||
| 124 | ) | ||
| 125 | set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE}) | ||
| 126 | |||
| 127 | # GLSL Specialization | ||
| 128 | # Disabled as there was no noticeable performance uplift specializing the shaders for OGL | ||
| 129 | |||
| 130 | # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h) | ||
| 131 | # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}") | ||
| 132 | # set(DEFINES_LINE_NUMBER 14) | ||
| 133 | # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME) | ||
| 134 | # add_custom_command( | ||
| 135 | # OUTPUT | ||
| 136 | # ${SOURCE_HEADER_FILE} | ||
| 137 | # COMMAND | ||
| 138 | # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME} | ||
| 139 | # MAIN_DEPENDENCY | ||
| 140 | # ${SOURCE_FILE} | ||
| 141 | # DEPENDS | ||
| 142 | # ${INPUT_FILE} | ||
| 143 | # ${SOURCE_FILE} | ||
| 144 | # ) | ||
| 145 | # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE}) | ||
| 146 | endforeach() | ||
| 147 | endmacro() | ||
| 148 | |||
| 149 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) | 98 | foreach(FILENAME IN ITEMS ${SHADER_FILES}) |
| 150 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) | 99 | string(REPLACE "." "_" SHADER_NAME ${FILENAME}) |
| 151 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) | 100 | set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) |
| 152 | |||
| 153 | if (${FILENAME} MATCHES "astc_decoder.comp") | ||
| 154 | ASTC_GEN() | ||
| 155 | endif() | ||
| 156 | |||
| 157 | # Skip generating source headers on Vulkan exclusive files | 101 | # Skip generating source headers on Vulkan exclusive files |
| 158 | if (NOT ${FILENAME} MATCHES "vulkan.*") | 102 | if (NOT ${FILENAME} MATCHES "vulkan.*") |
| 159 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) | 103 | set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) |
| @@ -207,7 +151,6 @@ endforeach() | |||
| 207 | 151 | ||
| 208 | set(SHADER_SOURCES ${SHADER_FILES}) | 152 | set(SHADER_SOURCES ${SHADER_FILES}) |
| 209 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) | 153 | list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) |
| 210 | list(APPEND SHADER_SOURCES ${ASTC_INCLUDES}) | ||
| 211 | 154 | ||
| 212 | add_custom_target(host_shaders | 155 | add_custom_target(host_shaders |
| 213 | DEPENDS | 156 | DEPENDS |
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index b4bb8299f..a33c916ac 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -24,9 +24,7 @@ | |||
| 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; | 24 | layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; |
| 25 | 25 | ||
| 26 | BEGIN_PUSH_CONSTANTS | 26 | BEGIN_PUSH_CONSTANTS |
| 27 | #ifndef BLOCK_WIDTH | ||
| 28 | UNIFORM(1) uvec2 block_dims; | 27 | UNIFORM(1) uvec2 block_dims; |
| 29 | #endif | ||
| 30 | UNIFORM(2) uint layer_stride; | 28 | UNIFORM(2) uint layer_stride; |
| 31 | UNIFORM(3) uint block_size; | 29 | UNIFORM(3) uint block_size; |
| 32 | UNIFORM(4) uint x_shift; | 30 | UNIFORM(4) uint x_shift; |
| @@ -77,15 +75,7 @@ int color_bitsread = 0; | |||
| 77 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode | 75 | // At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode |
| 78 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes | 76 | // So the maximum would be 144 (12 x 12) elements, x 2 for two planes |
| 79 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor | 77 | #define DIVCEIL(number, divisor) (number + divisor - 1) / divisor |
| 80 | |||
| 81 | #ifndef BLOCK_WIDTH | ||
| 82 | #define BLOCK_WIDTH block_dims.x | ||
| 83 | #define BLOCK_HEIGHT block_dims.y | ||
| 84 | #define ARRAY_NUM_ELEMENTS 144 | 78 | #define ARRAY_NUM_ELEMENTS 144 |
| 85 | #else | ||
| 86 | #define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT | ||
| 87 | #endif | ||
| 88 | |||
| 89 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) | 79 | #define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) |
| 90 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; | 80 | uvec4 result_vector[VECTOR_ARRAY_SIZE]; |
| 91 | 81 | ||
| @@ -275,7 +265,7 @@ uint Hash52(uint p) { | |||
| 275 | } | 265 | } |
| 276 | 266 | ||
| 277 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { | 267 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { |
| 278 | if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) { | 268 | if ((block_dims.y * block_dims.x) < 32) { |
| 279 | x <<= 1; | 269 | x <<= 1; |
| 280 | y <<= 1; | 270 | y <<= 1; |
| 281 | } | 271 | } |
| @@ -888,8 +878,8 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 888 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; | 878 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; |
| 889 | 879 | ||
| 890 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | 880 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { |
| 891 | const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1)); | 881 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); |
| 892 | const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1)); | 882 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); |
| 893 | const uint num_planes = is_dual_plane ? 2 : 1; | 883 | const uint num_planes = is_dual_plane ? 2 : 1; |
| 894 | const uint area = size.x * size.y; | 884 | const uint area = size.x * size.y; |
| 895 | const uint loop_count = min(result_index, area * num_planes); | 885 | const uint loop_count = min(result_index, area * num_planes); |
| @@ -900,8 +890,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 900 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); | 890 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); |
| 901 | } | 891 | } |
| 902 | for (uint plane = 0; plane < num_planes; ++plane) { | 892 | for (uint plane = 0; plane < num_planes; ++plane) { |
| 903 | for (uint t = 0; t < BLOCK_HEIGHT; t++) { | 893 | for (uint t = 0; t < block_dims.y; t++) { |
| 904 | for (uint s = 0; s < BLOCK_WIDTH; s++) { | 894 | for (uint s = 0; s < block_dims.x; s++) { |
| 905 | const uint cs = Ds * s; | 895 | const uint cs = Ds * s; |
| 906 | const uint ct = Dt * t; | 896 | const uint ct = Dt * t; |
| 907 | const uint gs = (cs * (size.x - 1) + 32) >> 6; | 897 | const uint gs = (cs * (size.x - 1) + 32) >> 6; |
| @@ -944,7 +934,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 944 | VectorIndicesFromBase(offset_base); | 934 | VectorIndicesFromBase(offset_base); |
| 945 | p.w = result_vector[array_index][vector_index]; | 935 | p.w = result_vector[array_index][vector_index]; |
| 946 | } | 936 | } |
| 947 | const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane; | 937 | const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; |
| 948 | const uint array_index = offset / 4; | 938 | const uint array_index = offset / 4; |
| 949 | const uint vector_index = offset % 4; | 939 | const uint vector_index = offset % 4; |
| 950 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; | 940 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; |
| @@ -986,8 +976,8 @@ int FindLayout(uint mode) { | |||
| 986 | 976 | ||
| 987 | 977 | ||
| 988 | void FillError(ivec3 coord) { | 978 | void FillError(ivec3 coord) { |
| 989 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { | 979 | for (uint j = 0; j < block_dims.y; j++) { |
| 990 | for (uint i = 0; i < BLOCK_WIDTH; i++) { | 980 | for (uint i = 0; i < block_dims.x; i++) { |
| 991 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); | 981 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); |
| 992 | } | 982 | } |
| 993 | } | 983 | } |
| @@ -1003,8 +993,8 @@ void FillVoidExtentLDR(ivec3 coord) { | |||
| 1003 | const float r = float(r_u) / 65535.0f; | 993 | const float r = float(r_u) / 65535.0f; |
| 1004 | const float g = float(g_u) / 65535.0f; | 994 | const float g = float(g_u) / 65535.0f; |
| 1005 | const float b = float(b_u) / 65535.0f; | 995 | const float b = float(b_u) / 65535.0f; |
| 1006 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { | 996 | for (uint j = 0; j < block_dims.y; j++) { |
| 1007 | for (uint i = 0; i < BLOCK_WIDTH; i++) { | 997 | for (uint i = 0; i < block_dims.x; i++) { |
| 1008 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); | 998 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); |
| 1009 | } | 999 | } |
| 1010 | } | 1000 | } |
| @@ -1099,7 +1089,7 @@ void DecompressBlock(ivec3 coord) { | |||
| 1099 | return; | 1089 | return; |
| 1100 | } | 1090 | } |
| 1101 | const uvec2 size_params = DecodeBlockSize(mode); | 1091 | const uvec2 size_params = DecodeBlockSize(mode); |
| 1102 | if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) { | 1092 | if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) { |
| 1103 | FillError(coord); | 1093 | FillError(coord); |
| 1104 | return; | 1094 | return; |
| 1105 | } | 1095 | } |
| @@ -1228,21 +1218,21 @@ void DecompressBlock(ivec3 coord) { | |||
| 1228 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); | 1218 | DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); |
| 1229 | 1219 | ||
| 1230 | UnquantizeTexelWeights(size_params, dual_plane); | 1220 | UnquantizeTexelWeights(size_params, dual_plane); |
| 1231 | for (uint j = 0; j < BLOCK_HEIGHT; j++) { | 1221 | for (uint j = 0; j < block_dims.y; j++) { |
| 1232 | for (uint i = 0; i < BLOCK_WIDTH; i++) { | 1222 | for (uint i = 0; i < block_dims.x; i++) { |
| 1233 | uint local_partition = 0; | 1223 | uint local_partition = 0; |
| 1234 | if (num_partitions > 1) { | 1224 | if (num_partitions > 1) { |
| 1235 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); | 1225 | local_partition = Select2DPartition(partition_index, i, j, num_partitions); |
| 1236 | } | 1226 | } |
| 1237 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); | 1227 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |
| 1238 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); | 1228 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |
| 1239 | const uint weight_offset = (j * BLOCK_WIDTH + i); | 1229 | const uint weight_offset = (j * block_dims.x + i); |
| 1240 | const uint array_index = weight_offset / 4; | 1230 | const uint array_index = weight_offset / 4; |
| 1241 | const uint vector_index = weight_offset % 4; | 1231 | const uint vector_index = weight_offset % 4; |
| 1242 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; | 1232 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; |
| 1243 | uvec4 weight_vec = uvec4(primary_weight); | 1233 | uvec4 weight_vec = uvec4(primary_weight); |
| 1244 | if (dual_plane) { | 1234 | if (dual_plane) { |
| 1245 | const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS; | 1235 | const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; |
| 1246 | const uint secondary_array_index = secondary_weight_offset / 4; | 1236 | const uint secondary_array_index = secondary_weight_offset / 4; |
| 1247 | const uint secondary_vector_index = secondary_weight_offset % 4; | 1237 | const uint secondary_vector_index = secondary_weight_offset % 4; |
| 1248 | const uint secondary_weight = | 1238 | const uint secondary_weight = |
| @@ -1280,7 +1270,7 @@ void main() { | |||
| 1280 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1270 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 1281 | offset += swizzle; | 1271 | offset += swizzle; |
| 1282 | 1272 | ||
| 1283 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1)); | 1273 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); |
| 1284 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | 1274 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { |
| 1285 | return; | 1275 | return; |
| 1286 | } | 1276 | } |
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h deleted file mode 100644 index 44ee50c5f..000000000 --- a/src/video_core/host_shaders/astc_decoder_spv_includes.h +++ /dev/null | |||
| @@ -1,20 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h" | ||
| 7 | #include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h" | ||
| 8 | #include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h" | ||
| 9 | #include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h" | ||
| 10 | #include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h" | ||
| 11 | #include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h" | ||
| 12 | #include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h" | ||
| 13 | #include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h" | ||
| 14 | #include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h" | ||
| 15 | #include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h" | ||
| 16 | #include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h" | ||
| 17 | #include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h" | ||
| 18 | #include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h" | ||
| 19 | #include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h" | ||
| 20 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index f22342252..54ee030ce 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_spv_includes.h" | 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" |
| 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -124,62 +124,13 @@ constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS> | |||
| 124 | }}; | 124 | }}; |
| 125 | 125 | ||
| 126 | struct AstcPushConstants { | 126 | struct AstcPushConstants { |
| 127 | std::array<u32, 2> blocks_dims; | ||
| 127 | u32 layer_stride; | 128 | u32 layer_stride; |
| 128 | u32 block_size; | 129 | u32 block_size; |
| 129 | u32 x_shift; | 130 | u32 x_shift; |
| 130 | u32 block_height; | 131 | u32 block_height; |
| 131 | u32 block_height_mask; | 132 | u32 block_height_mask; |
| 132 | }; | 133 | }; |
| 133 | |||
| 134 | size_t AstcFormatIndex(VideoCore::Surface::PixelFormat format) { | ||
| 135 | switch (format) { | ||
| 136 | case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 137 | case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_UNORM: | ||
| 138 | return 0; | ||
| 139 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 140 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_UNORM: | ||
| 141 | return 1; | ||
| 142 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 143 | case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_UNORM: | ||
| 144 | return 2; | ||
| 145 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 146 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_UNORM: | ||
| 147 | return 3; | ||
| 148 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 149 | case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_UNORM: | ||
| 150 | return 4; | ||
| 151 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 152 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_UNORM: | ||
| 153 | return 5; | ||
| 154 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 155 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_UNORM: | ||
| 156 | return 6; | ||
| 157 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 158 | case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_UNORM: | ||
| 159 | return 7; | ||
| 160 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_SRGB: | ||
| 161 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_UNORM: | ||
| 162 | return 8; | ||
| 163 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_SRGB: | ||
| 164 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_UNORM: | ||
| 165 | return 9; | ||
| 166 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 167 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_UNORM: | ||
| 168 | return 10; | ||
| 169 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 170 | case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_UNORM: | ||
| 171 | return 11; | ||
| 172 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_SRGB: | ||
| 173 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_UNORM: | ||
| 174 | return 12; | ||
| 175 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 176 | case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_UNORM: | ||
| 177 | return 13; | ||
| 178 | default: | ||
| 179 | UNREACHABLE(); | ||
| 180 | return 0; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } // Anonymous namespace | 134 | } // Anonymous namespace |
| 184 | 135 | ||
| 185 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | 136 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, |
| @@ -361,53 +312,19 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | |||
| 361 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), | 312 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), |
| 362 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 313 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 363 | compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ | 314 | compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ |
| 364 | memory_allocator_} { | 315 | memory_allocator_} {} |
| 365 | // These must match the order found in AstcFormatIndex | ||
| 366 | static constexpr std::array<std::span<const u32>, 14> ASTC_SHADERS{ | ||
| 367 | ASTC_DECODER_COMP_4X4_SPV, ASTC_DECODER_COMP_5X4_SPV, ASTC_DECODER_COMP_5X5_SPV, | ||
| 368 | ASTC_DECODER_COMP_6X5_SPV, ASTC_DECODER_COMP_6X6_SPV, ASTC_DECODER_COMP_8X5_SPV, | ||
| 369 | ASTC_DECODER_COMP_8X6_SPV, ASTC_DECODER_COMP_8X8_SPV, ASTC_DECODER_COMP_10X5_SPV, | ||
| 370 | ASTC_DECODER_COMP_10X6_SPV, ASTC_DECODER_COMP_10X8_SPV, ASTC_DECODER_COMP_10X10_SPV, | ||
| 371 | ASTC_DECODER_COMP_12X10_SPV, ASTC_DECODER_COMP_12X12_SPV, | ||
| 372 | }; | ||
| 373 | for (size_t index = 0; index < ASTC_SHADERS.size(); ++index) { | ||
| 374 | const auto& code = ASTC_SHADERS[index]; | ||
| 375 | const auto module_ = device.GetLogical().CreateShaderModule({ | ||
| 376 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 377 | .pNext = nullptr, | ||
| 378 | .flags = 0, | ||
| 379 | .codeSize = static_cast<u32>(code.size_bytes()), | ||
| 380 | .pCode = code.data(), | ||
| 381 | }); | ||
| 382 | device.SaveShader(code); | ||
| 383 | astc_pipelines[index] = device.GetLogical().CreateComputePipeline({ | ||
| 384 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 385 | .pNext = nullptr, | ||
| 386 | .flags = 0, | ||
| 387 | .stage{ | ||
| 388 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 389 | .pNext = nullptr, | ||
| 390 | .flags = 0, | ||
| 391 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 392 | .module = *module_, | ||
| 393 | .pName = "main", | ||
| 394 | .pSpecializationInfo = nullptr, | ||
| 395 | }, | ||
| 396 | .layout = *layout, | ||
| 397 | .basePipelineHandle = nullptr, | ||
| 398 | .basePipelineIndex = 0, | ||
| 399 | }); | ||
| 400 | } | ||
| 401 | } | ||
| 402 | 316 | ||
| 403 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 317 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 404 | 318 | ||
| 405 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | 319 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, |
| 406 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 320 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 407 | using namespace VideoCommon::Accelerated; | 321 | using namespace VideoCommon::Accelerated; |
| 408 | 322 | const std::array<u32, 2> block_dims{ | |
| 323 | VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||
| 324 | VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||
| 325 | }; | ||
| 409 | scheduler.RequestOutsideRenderPassOperationContext(); | 326 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 410 | const VkPipeline vk_pipeline = *astc_pipelines[AstcFormatIndex(image.info.format)]; | 327 | const VkPipeline vk_pipeline = *pipeline; |
| 411 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | 328 | const VkImageAspectFlags aspect_mask = image.AspectMask(); |
| 412 | const VkImage vk_image = image.Handle(); | 329 | const VkImage vk_image = image.Handle(); |
| 413 | const bool is_initialized = image.ExchangeInitialization(); | 330 | const bool is_initialized = image.ExchangeInitialization(); |
| @@ -454,9 +371,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 454 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 371 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 455 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 372 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 456 | ASSERT(params.bytes_per_block_log2 == 4); | 373 | ASSERT(params.bytes_per_block_log2 == 4); |
| 457 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, params, | 374 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, |
| 458 | descriptor_data](vk::CommandBuffer cmdbuf) { | 375 | params, descriptor_data](vk::CommandBuffer cmdbuf) { |
| 459 | const AstcPushConstants uniforms{ | 376 | const AstcPushConstants uniforms{ |
| 377 | .blocks_dims = block_dims, | ||
| 460 | .layer_stride = params.layer_stride, | 378 | .layer_stride = params.layer_stride, |
| 461 | .block_size = params.block_size, | 379 | .block_size = params.block_size, |
| 462 | .x_shift = params.x_shift, | 380 | .x_shift = params.x_shift, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 1f264bea6..dd3927376 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -95,8 +95,6 @@ public: | |||
| 95 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 95 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 96 | 96 | ||
| 97 | private: | 97 | private: |
| 98 | std::array<vk::Pipeline, 14> astc_pipelines; | ||
| 99 | |||
| 100 | Scheduler& scheduler; | 98 | Scheduler& scheduler; |
| 101 | StagingBufferPool& staging_buffer_pool; | 99 | StagingBufferPool& staging_buffer_pool; |
| 102 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | 100 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; |