summaryrefslogtreecommitdiff
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorGravatar ameerj2023-06-28 01:24:52 -0400
committerGravatar Ameer J2023-08-06 14:54:58 -0400
commit5a78b35b1abf071bd62b1ff8d0cb939bd30a549f (patch)
tree181d40f05679ed3ff2c9711306151886383f71a6 /src/video_core/host_shaders
parentsmall_block opt (diff)
downloadyuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.tar.gz
yuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.tar.xz
yuzu-5a78b35b1abf071bd62b1ff8d0cb939bd30a549f.zip
vulkan dims specialization
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt57
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp42
-rw-r--r--src/video_core/host_shaders/astc_decoder_spv_includes.h20
3 files changed, 103 insertions, 16 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index e61d9af80..20e8388ee 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -13,6 +13,11 @@ set(GLSL_INCLUDES
13 ${FIDELITYFX_FILES} 13 ${FIDELITYFX_FILES}
14) 14)
15 15
16set(ASTC_INCLUDES
17 # astc_decoder_glsl_includes.h
18 astc_decoder_spv_includes.h
19)
20
16set(SHADER_FILES 21set(SHADER_FILES
17 astc_decoder.comp 22 astc_decoder.comp
18 blit_color_float.frag 23 blit_color_float.frag
@@ -95,9 +100,60 @@ if (NOT GLSLANG_ERROR STREQUAL "")
95 set(QUIET_FLAG "") 100 set(QUIET_FLAG "")
96endif() 101endif()
97 102
103macro(ASTC_GEN)
104 # paired list of valid astc block dimensions
105 set(ASTC_WIDTHS 4 5 5 6 6 6 8 8 8 10 10 10 10 12 12)
106 set(ASTC_HEIGHTS 4 4 5 4 5 6 5 6 8 5 6 8 10 10 12)
107 list(LENGTH ASTC_WIDTHS NUM_ASTC_FORMATS)
108 math(EXPR NUM_ASTC_FORMATS "${NUM_ASTC_FORMATS}-1")
109 foreach(i RANGE ${NUM_ASTC_FORMATS})
110 list(GET ASTC_WIDTHS ${i} ASTC_WIDTH)
111 list(GET ASTC_HEIGHTS ${i} ASTC_HEIGHT)
112
113 # Vulkan SPIR-V Specialization
114
115 string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_SPV SPIRV_VARIABLE_NAME)
116 set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}_spv.h)
117 add_custom_command(
118 OUTPUT
119 ${SPIRV_HEADER_FILE}
120 COMMAND
121 ${GLSLANGVALIDATOR} -V -DBLOCK_WIDTH=${ASTC_WIDTH} -DBLOCK_HEIGHT=${ASTC_HEIGHT} ${QUIET_FLAG} ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
122 MAIN_DEPENDENCY
123 ${SOURCE_FILE}
124 )
125 set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
126
127 # GLSL Specialization
128 # Disabled as there was no noticeable performance uplift specializing the shaders for OGL
129
130 # set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT}.h)
131 # set(SHADER_DEFINES "#define BLOCK_WIDTH ${ASTC_WIDTH}" "#define BLOCK_HEIGHT ${ASTC_HEIGHT}")
132 # set(DEFINES_LINE_NUMBER 14)
133 # string(TOUPPER ${SHADER_NAME}_${ASTC_WIDTH}x${ASTC_HEIGHT} GLSL_VARIABLE_NAME)
134 # add_custom_command(
135 # OUTPUT
136 # ${SOURCE_HEADER_FILE}
137 # COMMAND
138 # ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE} "${SHADER_DEFINES}" ${DEFINES_LINE_NUMBER} ${GLSL_VARIABLE_NAME}
139 # MAIN_DEPENDENCY
140 # ${SOURCE_FILE}
141 # DEPENDS
142 # ${INPUT_FILE}
143 # ${SOURCE_FILE}
144 # )
145 # set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
146 endforeach()
147endmacro()
148
98foreach(FILENAME IN ITEMS ${SHADER_FILES}) 149foreach(FILENAME IN ITEMS ${SHADER_FILES})
99 string(REPLACE "." "_" SHADER_NAME ${FILENAME}) 150 string(REPLACE "." "_" SHADER_NAME ${FILENAME})
100 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) 151 set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
152
153 if (${FILENAME} MATCHES "astc_decoder.comp")
154 ASTC_GEN()
155 endif()
156
101 # Skip generating source headers on Vulkan exclusive files 157 # Skip generating source headers on Vulkan exclusive files
102 if (NOT ${FILENAME} MATCHES "vulkan.*") 158 if (NOT ${FILENAME} MATCHES "vulkan.*")
103 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) 159 set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
@@ -151,6 +207,7 @@ endforeach()
151 207
152set(SHADER_SOURCES ${SHADER_FILES}) 208set(SHADER_SOURCES ${SHADER_FILES})
153list(APPEND SHADER_SOURCES ${GLSL_INCLUDES}) 209list(APPEND SHADER_SOURCES ${GLSL_INCLUDES})
210list(APPEND SHADER_SOURCES ${ASTC_INCLUDES})
154 211
155add_custom_target(host_shaders 212add_custom_target(host_shaders
156 DEPENDS 213 DEPENDS
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index a33c916ac..b4bb8299f 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -24,7 +24,9 @@
24layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; 24layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
25 25
26BEGIN_PUSH_CONSTANTS 26BEGIN_PUSH_CONSTANTS
27#ifndef BLOCK_WIDTH
27UNIFORM(1) uvec2 block_dims; 28UNIFORM(1) uvec2 block_dims;
29#endif
28UNIFORM(2) uint layer_stride; 30UNIFORM(2) uint layer_stride;
29UNIFORM(3) uint block_size; 31UNIFORM(3) uint block_size;
30UNIFORM(4) uint x_shift; 32UNIFORM(4) uint x_shift;
@@ -75,7 +77,15 @@ int color_bitsread = 0;
75// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode 77// At most will require BLOCK_WIDTH x BLOCK_HEIGHT x 2 in dual plane mode
76// So the maximum would be 144 (12 x 12) elements, x 2 for two planes 78// So the maximum would be 144 (12 x 12) elements, x 2 for two planes
77#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor 79#define DIVCEIL(number, divisor) (number + divisor - 1) / divisor
80
81#ifndef BLOCK_WIDTH
82#define BLOCK_WIDTH block_dims.x
83#define BLOCK_HEIGHT block_dims.y
78#define ARRAY_NUM_ELEMENTS 144 84#define ARRAY_NUM_ELEMENTS 144
85#else
86#define ARRAY_NUM_ELEMENTS BLOCK_WIDTH * BLOCK_HEIGHT
87#endif
88
79#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) 89#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
80uvec4 result_vector[VECTOR_ARRAY_SIZE]; 90uvec4 result_vector[VECTOR_ARRAY_SIZE];
81 91
@@ -265,7 +275,7 @@ uint Hash52(uint p) {
265} 275}
266 276
267uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) { 277uint Select2DPartition(uint seed, uint x, uint y, uint partition_count) {
268 if ((block_dims.y * block_dims.x) < 32) { 278 if ((BLOCK_WIDTH * BLOCK_HEIGHT) < 32) {
269 x <<= 1; 279 x <<= 1;
270 y <<= 1; 280 y <<= 1;
271 } 281 }
@@ -878,8 +888,8 @@ uint UnquantizeTexelWeight(EncodingData val) {
878uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; 888uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
879 889
880void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { 890void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
881 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); 891 const uint Ds = uint((BLOCK_WIDTH * 0.5f + 1024) / (BLOCK_WIDTH - 1));
882 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); 892 const uint Dt = uint((BLOCK_HEIGHT * 0.5f + 1024) / (BLOCK_HEIGHT - 1));
883 const uint num_planes = is_dual_plane ? 2 : 1; 893 const uint num_planes = is_dual_plane ? 2 : 1;
884 const uint area = size.x * size.y; 894 const uint area = size.x * size.y;
885 const uint loop_count = min(result_index, area * num_planes); 895 const uint loop_count = min(result_index, area * num_planes);
@@ -890,8 +900,8 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
890 UnquantizeTexelWeight(GetEncodingFromVector(itr)); 900 UnquantizeTexelWeight(GetEncodingFromVector(itr));
891 } 901 }
892 for (uint plane = 0; plane < num_planes; ++plane) { 902 for (uint plane = 0; plane < num_planes; ++plane) {
893 for (uint t = 0; t < block_dims.y; t++) { 903 for (uint t = 0; t < BLOCK_HEIGHT; t++) {
894 for (uint s = 0; s < block_dims.x; s++) { 904 for (uint s = 0; s < BLOCK_WIDTH; s++) {
895 const uint cs = Ds * s; 905 const uint cs = Ds * s;
896 const uint ct = Dt * t; 906 const uint ct = Dt * t;
897 const uint gs = (cs * (size.x - 1) + 32) >> 6; 907 const uint gs = (cs * (size.x - 1) + 32) >> 6;
@@ -934,7 +944,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
934 VectorIndicesFromBase(offset_base); 944 VectorIndicesFromBase(offset_base);
935 p.w = result_vector[array_index][vector_index]; 945 p.w = result_vector[array_index][vector_index];
936 } 946 }
937 const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; 947 const uint offset = (t * BLOCK_WIDTH + s) + ARRAY_NUM_ELEMENTS * plane;
938 const uint array_index = offset / 4; 948 const uint array_index = offset / 4;
939 const uint vector_index = offset % 4; 949 const uint vector_index = offset % 4;
940 unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; 950 unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4;
@@ -976,8 +986,8 @@ int FindLayout(uint mode) {
976 986
977 987
978void FillError(ivec3 coord) { 988void FillError(ivec3 coord) {
979 for (uint j = 0; j < block_dims.y; j++) { 989 for (uint j = 0; j < BLOCK_HEIGHT; j++) {
980 for (uint i = 0; i < block_dims.x; i++) { 990 for (uint i = 0; i < BLOCK_WIDTH; i++) {
981 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0)); 991 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(0.0, 0.0, 0.0, 0.0));
982 } 992 }
983 } 993 }
@@ -993,8 +1003,8 @@ void FillVoidExtentLDR(ivec3 coord) {
993 const float r = float(r_u) / 65535.0f; 1003 const float r = float(r_u) / 65535.0f;
994 const float g = float(g_u) / 65535.0f; 1004 const float g = float(g_u) / 65535.0f;
995 const float b = float(b_u) / 65535.0f; 1005 const float b = float(b_u) / 65535.0f;
996 for (uint j = 0; j < block_dims.y; j++) { 1006 for (uint j = 0; j < BLOCK_HEIGHT; j++) {
997 for (uint i = 0; i < block_dims.x; i++) { 1007 for (uint i = 0; i < BLOCK_WIDTH; i++) {
998 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); 1008 imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a));
999 } 1009 }
1000 } 1010 }
@@ -1089,7 +1099,7 @@ void DecompressBlock(ivec3 coord) {
1089 return; 1099 return;
1090 } 1100 }
1091 const uvec2 size_params = DecodeBlockSize(mode); 1101 const uvec2 size_params = DecodeBlockSize(mode);
1092 if ((size_params.x > block_dims.x) || (size_params.y > block_dims.y)) { 1102 if ((size_params.x > BLOCK_WIDTH) || (size_params.y > BLOCK_HEIGHT)) {
1093 FillError(coord); 1103 FillError(coord);
1094 return; 1104 return;
1095 } 1105 }
@@ -1218,21 +1228,21 @@ void DecompressBlock(ivec3 coord) {
1218 DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane)); 1228 DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
1219 1229
1220 UnquantizeTexelWeights(size_params, dual_plane); 1230 UnquantizeTexelWeights(size_params, dual_plane);
1221 for (uint j = 0; j < block_dims.y; j++) { 1231 for (uint j = 0; j < BLOCK_HEIGHT; j++) {
1222 for (uint i = 0; i < block_dims.x; i++) { 1232 for (uint i = 0; i < BLOCK_WIDTH; i++) {
1223 uint local_partition = 0; 1233 uint local_partition = 0;
1224 if (num_partitions > 1) { 1234 if (num_partitions > 1) {
1225 local_partition = Select2DPartition(partition_index, i, j, num_partitions); 1235 local_partition = Select2DPartition(partition_index, i, j, num_partitions);
1226 } 1236 }
1227 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); 1237 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
1228 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); 1238 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
1229 const uint weight_offset = (j * block_dims.x + i); 1239 const uint weight_offset = (j * BLOCK_WIDTH + i);
1230 const uint array_index = weight_offset / 4; 1240 const uint array_index = weight_offset / 4;
1231 const uint vector_index = weight_offset % 4; 1241 const uint vector_index = weight_offset % 4;
1232 const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; 1242 const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
1233 uvec4 weight_vec = uvec4(primary_weight); 1243 uvec4 weight_vec = uvec4(primary_weight);
1234 if (dual_plane) { 1244 if (dual_plane) {
1235 const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; 1245 const uint secondary_weight_offset = (j * BLOCK_WIDTH + i) + ARRAY_NUM_ELEMENTS;
1236 const uint secondary_array_index = secondary_weight_offset / 4; 1246 const uint secondary_array_index = secondary_weight_offset / 4;
1237 const uint secondary_vector_index = secondary_weight_offset % 4; 1247 const uint secondary_vector_index = secondary_weight_offset % 4;
1238 const uint secondary_weight = 1248 const uint secondary_weight =
@@ -1270,7 +1280,7 @@ void main() {
1270 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; 1280 offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
1271 offset += swizzle; 1281 offset += swizzle;
1272 1282
1273 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); 1283 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(BLOCK_WIDTH, BLOCK_HEIGHT, 1));
1274 if (any(greaterThanEqual(coord, imageSize(dest_image)))) { 1284 if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
1275 return; 1285 return;
1276 } 1286 }
diff --git a/src/video_core/host_shaders/astc_decoder_spv_includes.h b/src/video_core/host_shaders/astc_decoder_spv_includes.h
new file mode 100644
index 000000000..44ee50c5f
--- /dev/null
+++ b/src/video_core/host_shaders/astc_decoder_spv_includes.h
@@ -0,0 +1,20 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "video_core/host_shaders/astc_decoder_comp_10x10_spv.h"
7#include "video_core/host_shaders/astc_decoder_comp_10x5_spv.h"
8#include "video_core/host_shaders/astc_decoder_comp_10x6_spv.h"
9#include "video_core/host_shaders/astc_decoder_comp_10x8_spv.h"
10#include "video_core/host_shaders/astc_decoder_comp_12x10_spv.h"
11#include "video_core/host_shaders/astc_decoder_comp_12x12_spv.h"
12#include "video_core/host_shaders/astc_decoder_comp_4x4_spv.h"
13#include "video_core/host_shaders/astc_decoder_comp_5x4_spv.h"
14#include "video_core/host_shaders/astc_decoder_comp_5x5_spv.h"
15#include "video_core/host_shaders/astc_decoder_comp_6x5_spv.h"
16#include "video_core/host_shaders/astc_decoder_comp_6x6_spv.h"
17#include "video_core/host_shaders/astc_decoder_comp_8x5_spv.h"
18#include "video_core/host_shaders/astc_decoder_comp_8x6_spv.h"
19#include "video_core/host_shaders/astc_decoder_comp_8x8_spv.h"
20#include "video_core/host_shaders/astc_decoder_comp_spv.h"