summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Ameer J2023-08-02 18:15:59 -0400
committerGravatar Ameer J2023-08-06 14:54:58 -0400
commit8ce158bce6912b2263f1724e6c09d8b517ef18c3 (patch)
treeded2c5ccef3c5def9b4ce03b510c5da8e5ea48a9
parentvulkan dims specialization (diff)
downloadyuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.gz
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.tar.xz
yuzu-8ce158bce6912b2263f1724e6c09d8b517ef18c3.zip
Revert "vulkan dims specialization"
This reverts commit e6243058f2269bd79ac8479d58e55feec2611e9d.
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt57
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp42
-rw-r--r--src/video_core/host_shaders/astc_decoder_spv_includes.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp104
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h2
5 files changed, 27 insertions, 198 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 20e8388ee..e61d9af80 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -13,11 +13,6 @@ set(GLSL_INCLUDES
13 ${FIDELITYFX_FILES} 13 ${FIDELITYFX_FILES}
14) 14)
15 15
16set(ASTC_INCLUDES
17 # astc_decoder_glsl_includes.h
18 astc_decoder_spv_includes.h
19)
20
21set(SHADER_FILES 16set(SHADER_FILES
22 astc_decoder.comp 17 astc_decoder.comp
23 blit_color_float.frag 18 blit_color_float.frag
@@ -100,60 +95,9 @@ if (NOT GLSLANG_ERROR STREQUAL "")
100 set(QUIET_FLAG "") 95 set(QUIET_FLAG "")
101endif() 96endif()
102 97
103macro(ASTC_GEN)
104 # paired list of valid astc block dimensions
105 set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12)
106 set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12)
107 list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS)
108 math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1")
109 foreach(i RANGE ${NUM_ASTC_FORMATS})
110 list(GET ASTC_WIDTHS ${i} ASTC_WIDTH)
111 list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT)
112
113 # Vulkan SPIR-V Specialization
114
115 string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME)
116 set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h)
117 add_custom_command(
118 OUTPUT
119 ${SPIRV_HEADER_FILE}
120 COMMAND
121 ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
122 MAIN_DEPENDENCY
123 ${SOURCE_FILE}
124 )
125 set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
126
127 # GLSL Specialization
128 # Disabled as there was no noticeable performance uplift specializing the shaders for OGL
129
130 # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h)
131 # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}")
132 # set(DEFINES_LINE_NUMBER 14)
133 # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME)
134 # add_custom_command(
135 # OUTPUT
136 # ${SOURCE_HEADER_FILE}
137 # COMMAND
138 # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME}
139 # MAIN_DEPENDENCY
140 # ${SOURCE_FILE}
141 # DEPENDS
142 # ${INPUT_FILE}
143 # ${SOURCE_FILE}
144 # )
145 # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
146 endforeach()
147endmacro()
148
149foreach(FILENAME IN ITEMS ${SHADER_FILES}) 98foreach(FILENAME IN ITEMS ${SHADER_FILES})
150 string(REPLACE "." "_" SHADER_NAME ${FILENAME}) 99 string(REPLACE "." "_" SHADER_NAME ${FILENAME})
151 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) 100 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
152
153 if (${FILENAME} MATCHES "astc_decoder.comp")
154 ASTC_GEN()
155 endif()
156
157 # Skip generating source headers on Vulkan exclusive files 101 # Skip generating source headers on Vulkan exclusive files
158 if (NOT ${FILENAME} MATCHES "vulkan.*") 102 if (NOT ${FILENAME} MATCHES "vulkan.*")
159 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) 103 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
@@ -207,7 +151,6 @@ endforeach()
207 151
208set(SHADER_SOURCES ${SHADER_FILES}) 152set(SHADER_SOURCES ${SHADER_FILES})
209list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) 153list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
210list(APPEND SHADER_SOURCES ${ASTC_INCLUDES})
211 154
212add_custom_target(host_shaders 155add_custom_target(host_shaders
213 DEPENDS 156 DEPENDS
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index b4bb8299f..a33c916ac 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -24,9 +24,7 @@
24layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; 24layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
25 25
26BEGIN_PUSH_CONSTANTS 26BEGIN_PUSH_CONSTANTS
27#ifndef BLOCK_WIDTH
28UNIFORM(1) uvec2 block_dims; 27UNIFORM(1) uvec2 block_dims;
29#endif
30UNIFORM(2) uint layer_stride; 28UNIFORM(2) uint layer_stride;
31UNIFORM(3) uint block_size; 29UNIFORM(3) uint block_size;
32UNIFORM(4) uint x_shift; 30UNIFORM(4) uint x_shift;
@@ -77,15 +75,7 @@ int color_bitsread = 0;
77// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode 75// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode
78// So the maximum would be 144 (12 x 12) elements, x 2 for two planes 76// So the maximum would be 144 (12 x 12) elements, x 2 for two planes
79#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor 77#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor
80
81#ifndef BLOCK_WIDTH
82#define BLOCK_WIDTH block_dims.x
83#define BLOCK_HEIGHT block_dims.y
84#define ARRAY_NUM_ELEMENTS 144 78#define ARRAY_NUM_ELEMENTS 144
85#else
86#define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT
87#endif
88
89#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) 79#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
90uvec4 result_vector[VECTOR_ARRAY_SIZE]; 80uvec4 result_vector[VECTOR_ARRAY_SIZE];
91 81
@@ -275,7 +265,7 @@ uint Hash52(uint p) {
275} 265}
276 266
277uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { 267uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) {
278 if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) { 268 if ((block_dims.y * block_dims.x) < 32) {
279 x <<= 1; 269 x <<= 1;
280 y <<= 1; 270 y <<= 1;
281 } 271 }
@@ -888,8 +878,8 @@ uint UnquantizeTexelWeight(EncodingData val) {
888uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; 878uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
889 879
890void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { 880void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
891 const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1)); 881 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
892 const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1)); 882 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
893 const uint num_planes = is_dual_plane ? 2 : 1; 883 const uint num_planes = is_dual_plane ? 2 : 1;
894 const uint area = size.x * size.y; 884 const uint area = size.x * size.y;
895 const uint loop_count = min(result_index, area * num_planes); 885 const uint loop_count = min(result_index, area * num_planes);
@@ -900,8 +890,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
900 UnquantizeTexelWeight(GetEncodingFromVector(itr)); 890 UnquantizeTexelWeight(GetEncodingFromVector(itr));
901 } 891 }
902 for (uint plane = 0; plane < num_planes; ++plane) { 892 for (uint plane = 0; plane < num_planes; ++plane) {
903 for (uint t = 0; t < BLOCK_HEIGHT; t++) { 893 for (uint t = 0; t < block_dims.y; t++) {
904 for (uint s = 0; s < BLOCK_WIDTH; s++) { 894 for (uint s = 0; s < block_dims.x; s++) {
905 const uint cs = Ds * s; 895 const uint cs = Ds * s;
906 const uint ct = Dt * t; 896 const uint ct = Dt * t;
907 const uint gs = (cs * (size.x - 1) + 32) >> 6; 897 const uint gs = (cs * (size.x - 1) + 32) >> 6;
@@ -944,7 +934,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
944 VectorIndicesFromBase(offset_base); 934 VectorIndicesFromBase(offset_base);
945 p.w = result_vector[array_index][vector_index]; 935 p.w = result_vector[array_index][vector_index];
946 } 936 }
947 const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane; 937 const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane;
948 const uint array_index = offset / 4; 938 const uint array_index = offset / 4;
949 const uint vector_index = offset % 4; 939 const uint vector_index = offset % 4;
950 unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; 940 unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
@@ -986,8 +976,8 @@ int FindLayout(uint mode) {
986 976
987 977
988void FillError(ivec3 coord) { 978void FillError(ivec3 coord) {
989 for (uint j = 0; j < BLOCK_HEIGHT; j++) { 979 for (uint j = 0; j < block_dims.y; j++) {
990 for (uint i = 0; i < BLOCK_WIDTH; i++) { 980 for (uint i = 0; i < block_dims.x; i++) {
991 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); 981 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
992 } 982 }
993 } 983 }
@@ -1003,8 +993,8 @@ void FillVoidExtentLDR(ivec3 coord) {
1003 const float r = float(r_u) / 65535.0f; 993 const float r = float(r_u) / 65535.0f;
1004 const float g = float(g_u) / 65535.0f; 994 const float g = float(g_u) / 65535.0f;
1005 const float b = float(b_u) / 65535.0f; 995 const float b = float(b_u) / 65535.0f;
1006 for (uint j = 0; j < BLOCK_HEIGHT; j++) { 996 for (uint j = 0; j < block_dims.y; j++) {
1007 for (uint i = 0; i < BLOCK_WIDTH; i++) { 997 for (uint i = 0; i < block_dims.x; i++) {
1008 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); 998 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
1009 } 999 }
1010 } 1000 }
@@ -1099,7 +1089,7 @@ void DecompressBlock(ivec3 coord) {
1099 return; 1089 return;
1100 } 1090 }
1101 const uvec2 size_params = DecodeBlockSize(mode); 1091 const uvec2 size_params = DecodeBlockSize(mode);
1102 if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) { 1092 if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) {
1103 FillError(coord); 1093 FillError(coord);
1104 return; 1094 return;
1105 } 1095 }
@@ -1228,21 +1218,21 @@ void DecompressBlock(ivec3 coord) {
1228 DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); 1218 DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
1229 1219
1230 UnquantizeTexelWeights(size_params, dual_plane); 1220 UnquantizeTexelWeights(size_params, dual_plane);
1231 for (uint j = 0; j < BLOCK_HEIGHT; j++) { 1221 for (uint j = 0; j < block_dims.y; j++) {
1232 for (uint i = 0; i < BLOCK_WIDTH; i++) { 1222 for (uint i = 0; i < block_dims.x; i++) {
1233 uint local_partition = 0; 1223 uint local_partition = 0;
1234 if (num_partitions > 1) { 1224 if (num_partitions > 1) {
1235 local_partition = Select2DPartition(partition_index, i, j, num_partitions); 1225 local_partition = Select2DPartition(partition_index, i, j, num_partitions);
1236 } 1226 }
1237 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); 1227 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
1238 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); 1228 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
1239 const uint weight_offset = (j * BLOCK_WIDTH + i); 1229 const uint weight_offset = (j * block_dims.x + i);
1240 const uint array_index = weight_offset / 4; 1230 const uint array_index = weight_offset / 4;
1241 const uint vector_index = weight_offset % 4; 1231 const uint vector_index = weight_offset % 4;
1242 const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; 1232 const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
1243 uvec4 weight_vec = uvec4(primary_weight); 1233 uvec4 weight_vec = uvec4(primary_weight);
1244 if (dual_plane) { 1234 if (dual_plane) {
1245 const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS; 1235 const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
1246 const uint secondary_array_index = secondary_weight_offset / 4; 1236 const uint secondary_array_index = secondary_weight_offset / 4;
1247 const uint secondary_vector_index = secondary_weight_offset % 4; 1237 const uint secondary_vector_index = secondary_weight_offset % 4;
1248 const uint secondary_weight = 1238 const uint secondary_weight =
@@ -1280,7 +1270,7 @@ void main() {
1280 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; 1270 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
1281 offset += swizzle; 1271 offset += swizzle;
1282 1272
1283 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1)); 1273 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
1284 if (any(greaterThanEqual(coord, imageSize(dest_image)))) { 1274 if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
1285 return; 1275 return;
1286 } 1276 }
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h
deleted file mode 100644
index 44ee50c5f..000000000
--- a/src/video_core/host_shaders/astc_decoder_spv_includes.h
+++ /dev/null
@@ -1,20 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h"
7#include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h"
8#include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h"
9#include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h"
10#include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h"
11#include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h"
12#include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h"
13#include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h"
14#include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h"
15#include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h"
16#include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h"
17#include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h"
18#include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h"
19#include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h"
20#include "video_core/host_shaders/astc_decoder_comp_spv.h"
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index f22342252..54ee030ce 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -11,7 +11,7 @@
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/div_ceil.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/astc_decoder_spv_includes.h" 14#include "video_core/host_shaders/astc_decoder_comp_spv.h"
15#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 15#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
16#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 16#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
17#include "video_core/renderer_vulkan/vk_compute_pass.h" 17#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -124,62 +124,13 @@ constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS>
124 }}; 124 }};
125 125
126struct AstcPushConstants { 126struct AstcPushConstants {
127 std::array<u32, 2> blocks_dims;
127 u32 layer_stride; 128 u32 layer_stride;
128 u32 block_size; 129 u32 block_size;
129 u32 x_shift; 130 u32 x_shift;
130 u32 block_height; 131 u32 block_height;
131 u32 block_height_mask; 132 u32 block_height_mask;
132}; 133};
133
134size_t AstcFormatIndex(VideoCore::Surface::PixelFormat format) {
135 switch (format) {
136 case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_SRGB:
137 case VideoCore::Surface::PixelFormat::ASTC_2D_4X4_UNORM:
138 return 0;
139 case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_SRGB:
140 case VideoCore::Surface::PixelFormat::ASTC_2D_5X4_UNORM:
141 return 1;
142 case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_SRGB:
143 case VideoCore::Surface::PixelFormat::ASTC_2D_5X5_UNORM:
144 return 2;
145 case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_SRGB:
146 case VideoCore::Surface::PixelFormat::ASTC_2D_6X5_UNORM:
147 return 3;
148 case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_SRGB:
149 case VideoCore::Surface::PixelFormat::ASTC_2D_6X6_UNORM:
150 return 4;
151 case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_SRGB:
152 case VideoCore::Surface::PixelFormat::ASTC_2D_8X5_UNORM:
153 return 5;
154 case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_SRGB:
155 case VideoCore::Surface::PixelFormat::ASTC_2D_8X6_UNORM:
156 return 6;
157 case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_SRGB:
158 case VideoCore::Surface::PixelFormat::ASTC_2D_8X8_UNORM:
159 return 7;
160 case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_SRGB:
161 case VideoCore::Surface::PixelFormat::ASTC_2D_10X5_UNORM:
162 return 8;
163 case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_SRGB:
164 case VideoCore::Surface::PixelFormat::ASTC_2D_10X6_UNORM:
165 return 9;
166 case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_SRGB:
167 case VideoCore::Surface::PixelFormat::ASTC_2D_10X8_UNORM:
168 return 10;
169 case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_SRGB:
170 case VideoCore::Surface::PixelFormat::ASTC_2D_10X10_UNORM:
171 return 11;
172 case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_SRGB:
173 case VideoCore::Surface::PixelFormat::ASTC_2D_12X10_UNORM:
174 return 12;
175 case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_SRGB:
176 case VideoCore::Surface::PixelFormat::ASTC_2D_12X12_UNORM:
177 return 13;
178 default:
179 UNREACHABLE();
180 return 0;
181 }
182}
183} // Anonymous namespace 134} // Anonymous namespace
184 135
185ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, 136ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
@@ -361,53 +312,19 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_,
361 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), 312 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
362 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 313 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
363 compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{ 314 compute_pass_descriptor_queue{compute_pass_descriptor_queue_}, memory_allocator{
364 memory_allocator_} { 315 memory_allocator_} {}
365 // These must match the order found in AstcFormatIndex
366 static constexpr std::array<std::span<const u32>, 14> ASTC_SHADERS{
367 ASTC_DECODER_COMP_4X4_SPV, ASTC_DECODER_COMP_5X4_SPV, ASTC_DECODER_COMP_5X5_SPV,
368 ASTC_DECODER_COMP_6X5_SPV, ASTC_DECODER_COMP_6X6_SPV, ASTC_DECODER_COMP_8X5_SPV,
369 ASTC_DECODER_COMP_8X6_SPV, ASTC_DECODER_COMP_8X8_SPV, ASTC_DECODER_COMP_10X5_SPV,
370 ASTC_DECODER_COMP_10X6_SPV, ASTC_DECODER_COMP_10X8_SPV, ASTC_DECODER_COMP_10X10_SPV,
371 ASTC_DECODER_COMP_12X10_SPV, ASTC_DECODER_COMP_12X12_SPV,
372 };
373 for (size_t index = 0; index < ASTC_SHADERS.size(); ++index) {
374 const auto& code = ASTC_SHADERS[index];
375 const auto module_ = device.GetLogical().CreateShaderModule({
376 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
377 .pNext = nullptr,
378 .flags = 0,
379 .codeSize = static_cast<u32>(code.size_bytes()),
380 .pCode = code.data(),
381 });
382 device.SaveShader(code);
383 astc_pipelines[index] = device.GetLogical().CreateComputePipeline({
384 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
385 .pNext = nullptr,
386 .flags = 0,
387 .stage{
388 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
389 .pNext = nullptr,
390 .flags = 0,
391 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
392 .module = *module_,
393 .pName = "main",
394 .pSpecializationInfo = nullptr,
395 },
396 .layout = *layout,
397 .basePipelineHandle = nullptr,
398 .basePipelineIndex = 0,
399 });
400 }
401}
402 316
403ASTCDecoderPass::~ASTCDecoderPass() = default; 317ASTCDecoderPass::~ASTCDecoderPass() = default;
404 318
405void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, 319void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
406 std::span<const VideoCommon::SwizzleParameters> swizzles) { 320 std::span<const VideoCommon::SwizzleParameters> swizzles) {
407 using namespace VideoCommon::Accelerated; 321 using namespace VideoCommon::Accelerated;
408 322 const std::array<u32, 2> block_dims{
323 VideoCore::Surface::DefaultBlockWidth(image.info.format),
324 VideoCore::Surface::DefaultBlockHeight(image.info.format),
325 };
409 scheduler.RequestOutsideRenderPassOperationContext(); 326 scheduler.RequestOutsideRenderPassOperationContext();
410 const VkPipeline vk_pipeline = *astc_pipelines[AstcFormatIndex(image.info.format)]; 327 const VkPipeline vk_pipeline = *pipeline;
411 const VkImageAspectFlags aspect_mask = image.AspectMask(); 328 const VkImageAspectFlags aspect_mask = image.AspectMask();
412 const VkImage vk_image = image.Handle(); 329 const VkImage vk_image = image.Handle();
413 const bool is_initialized = image.ExchangeInitialization(); 330 const bool is_initialized = image.ExchangeInitialization();
@@ -454,9 +371,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
454 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 371 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
455 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 372 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
456 ASSERT(params.bytes_per_block_log2 == 4); 373 ASSERT(params.bytes_per_block_log2 == 4);
457 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, params, 374 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
458 descriptor_data](vk::CommandBuffer cmdbuf) { 375 params, descriptor_data](vk::CommandBuffer cmdbuf) {
459 const AstcPushConstants uniforms{ 376 const AstcPushConstants uniforms{
377 .blocks_dims = block_dims,
460 .layer_stride = params.layer_stride, 378 .layer_stride = params.layer_stride,
461 .block_size = params.block_size, 379 .block_size = params.block_size,
462 .x_shift = params.x_shift, 380 .x_shift = params.x_shift,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 1f264bea6..dd3927376 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -95,8 +95,6 @@ public:
95 std::span<const VideoCommon::SwizzleParameters> swizzles); 95 std::span<const VideoCommon::SwizzleParameters> swizzles);
96 96
97private: 97private:
98 std::array<vk::Pipeline, 14> astc_pipelines;
99
100 Scheduler& scheduler; 98 Scheduler& scheduler;
101 StagingBufferPool& staging_buffer_pool; 99 StagingBufferPool& staging_buffer_pool;
102 ComputePassDescriptorQueue& compute_pass_descriptor_queue; 100 ComputePassDescriptorQueue& compute_pass_descriptor_queue;