diff options
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 107 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 11 |
5 files changed, 50 insertions, 155 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index eaba1b103..c37f15bfd 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -11,12 +11,8 @@ | |||
| 11 | #define UNIFORM(n) | 11 | #define UNIFORM(n) |
| 12 | #define BINDING_INPUT_BUFFER 0 | 12 | #define BINDING_INPUT_BUFFER 0 |
| 13 | #define BINDING_ENC_BUFFER 1 | 13 | #define BINDING_ENC_BUFFER 1 |
| 14 | #define BINDING_6_TO_8_BUFFER 2 | 14 | #define BINDING_SWIZZLE_BUFFER 2 |
| 15 | #define BINDING_7_TO_8_BUFFER 3 | 15 | #define BINDING_OUTPUT_IMAGE 3 |
| 16 | #define BINDING_8_TO_8_BUFFER 4 | ||
| 17 | #define BINDING_BYTE_TO_16_BUFFER 5 | ||
| 18 | #define BINDING_SWIZZLE_BUFFER 6 | ||
| 19 | #define BINDING_OUTPUT_IMAGE 7 | ||
| 20 | 16 | ||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | 17 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
| 22 | 18 | ||
| @@ -26,10 +22,6 @@ | |||
| 26 | #define BINDING_SWIZZLE_BUFFER 0 | 22 | #define BINDING_SWIZZLE_BUFFER 0 |
| 27 | #define BINDING_INPUT_BUFFER 1 | 23 | #define BINDING_INPUT_BUFFER 1 |
| 28 | #define BINDING_ENC_BUFFER 2 | 24 | #define BINDING_ENC_BUFFER 2 |
| 29 | #define BINDING_6_TO_8_BUFFER 3 | ||
| 30 | #define BINDING_7_TO_8_BUFFER 4 | ||
| 31 | #define BINDING_8_TO_8_BUFFER 5 | ||
| 32 | #define BINDING_BYTE_TO_16_BUFFER 6 | ||
| 33 | #define BINDING_OUTPUT_IMAGE 0 | 25 | #define BINDING_OUTPUT_IMAGE 0 |
| 34 | 26 | ||
| 35 | #endif | 27 | #endif |
| @@ -76,19 +68,6 @@ layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { | |||
| 76 | layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { | 68 | layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { |
| 77 | EncodingData encoding_values[]; | 69 | EncodingData encoding_values[]; |
| 78 | }; | 70 | }; |
| 79 | // ASTC Precompiled tables | ||
| 80 | layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 { | ||
| 81 | uint REPLICATE_6_BIT_TO_8_TABLE[]; | ||
| 82 | }; | ||
| 83 | layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 { | ||
| 84 | uint REPLICATE_7_BIT_TO_8_TABLE[]; | ||
| 85 | }; | ||
| 86 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | ||
| 87 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | ||
| 88 | }; | ||
| 89 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 90 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 91 | }; | ||
| 92 | 71 | ||
| 93 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; | 72 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| 94 | 73 | ||
| @@ -139,6 +118,19 @@ const uint REPLICATE_4_BIT_TO_6_TABLE[16] = | |||
| 139 | const uint REPLICATE_5_BIT_TO_6_TABLE[32] = | 118 | const uint REPLICATE_5_BIT_TO_6_TABLE[32] = |
| 140 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, | 119 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, |
| 141 | 47, 49, 51, 53, 55, 57, 59, 61, 63); | 120 | 47, 49, 51, 53, 55, 57, 59, 61, 63); |
| 121 | const uint REPLICATE_6_BIT_TO_8_TABLE[64] = | ||
| 122 | uint[](0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89, | ||
| 123 | 93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162, | ||
| 124 | 166, 170, 174, 178, 182, 186, 190, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, | ||
| 125 | 239, 243, 247, 251, 255); | ||
| 126 | const uint REPLICATE_7_BIT_TO_8_TABLE[128] = | ||
| 127 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, | ||
| 128 | 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, | ||
| 129 | 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, | ||
| 130 | 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, | ||
| 131 | 165, 167, 169, 171, 173, 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, | ||
| 132 | 201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, | ||
| 133 | 237, 239, 241, 243, 245, 247, 249, 251, 253, 255); | ||
| 142 | 134 | ||
| 143 | // Input ASTC texture globals | 135 | // Input ASTC texture globals |
| 144 | uint current_index = 0; | 136 | uint current_index = 0; |
| @@ -207,8 +199,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) { | |||
| 207 | } | 199 | } |
| 208 | 200 | ||
| 209 | uvec4 ReplicateByteTo16(uvec4 value) { | 201 | uvec4 ReplicateByteTo16(uvec4 value) { |
| 210 | return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], | 202 | return value * 0x101; |
| 211 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); | ||
| 212 | } | 203 | } |
| 213 | 204 | ||
| 214 | uint ReplicateBitTo7(uint value) { | 205 | uint ReplicateBitTo7(uint value) { |
| @@ -236,7 +227,7 @@ uint FastReplicateTo8(uint value, uint num_bits) { | |||
| 236 | case 7: | 227 | case 7: |
| 237 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | 228 | return REPLICATE_7_BIT_TO_8_TABLE[value]; |
| 238 | case 8: | 229 | case 8: |
| 239 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | 230 | return value; |
| 240 | } | 231 | } |
| 241 | return Replicate(value, num_bits, 8); | 232 | return Replicate(value, num_bits, 8); |
| 242 | } | 233 | } |
| @@ -1327,6 +1318,9 @@ void main() { | |||
| 1327 | offset += swizzle; | 1318 | offset += swizzle; |
| 1328 | 1319 | ||
| 1329 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); | 1320 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); |
| 1321 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | ||
| 1322 | return; | ||
| 1323 | } | ||
| 1330 | uint block_index = | 1324 | uint block_index = |
| 1331 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; | 1325 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; |
| 1332 | 1326 | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 47fddcb6e..abaf1ee6a 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -69,7 +69,8 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 69 | swizzle_table_buffer.Create(); | 69 | swizzle_table_buffer.Create(); |
| 70 | astc_buffer.Create(); | 70 | astc_buffer.Create(); |
| 71 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | 71 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); |
| 72 | glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_BUFFER_DATA), &ASTC_BUFFER_DATA, 0); | 72 | glNamedBufferStorage(astc_buffer.handle, sizeof(ASTC_ENCODINGS_VALUES), &ASTC_ENCODINGS_VALUES, |
| 73 | 0); | ||
| 73 | } | 74 | } |
| 74 | 75 | ||
| 75 | UtilShaders::~UtilShaders() = default; | 76 | UtilShaders::~UtilShaders() = default; |
| @@ -79,12 +80,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 79 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | 80 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; |
| 80 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 81 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 81 | static constexpr GLuint BINDING_ENC_BUFFER = 2; | 82 | static constexpr GLuint BINDING_ENC_BUFFER = 2; |
| 82 | |||
| 83 | static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; | ||
| 84 | static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; | ||
| 85 | static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; | ||
| 86 | static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; | ||
| 87 | |||
| 88 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 83 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 89 | 84 | ||
| 90 | const Extent2D tile_size{ | 85 | const Extent2D tile_size{ |
| @@ -93,21 +88,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 93 | }; | 88 | }; |
| 94 | program_manager.BindHostCompute(astc_decoder_program.handle); | 89 | program_manager.BindHostCompute(astc_decoder_program.handle); |
| 95 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 90 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 96 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle, | 91 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); |
| 97 | offsetof(AstcBufferData, encoding_values), | ||
| 98 | sizeof(AstcBufferData::encoding_values)); | ||
| 99 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, astc_buffer.handle, | ||
| 100 | offsetof(AstcBufferData, replicate_6_to_8), | ||
| 101 | sizeof(AstcBufferData::replicate_6_to_8)); | ||
| 102 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, astc_buffer.handle, | ||
| 103 | offsetof(AstcBufferData, replicate_7_to_8), | ||
| 104 | sizeof(AstcBufferData::replicate_7_to_8)); | ||
| 105 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, | ||
| 106 | offsetof(AstcBufferData, replicate_8_to_8), | ||
| 107 | sizeof(AstcBufferData::replicate_8_to_8)); | ||
| 108 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, | ||
| 109 | offsetof(AstcBufferData, replicate_byte_to_16), | ||
| 110 | sizeof(AstcBufferData::replicate_byte_to_16)); | ||
| 111 | 92 | ||
| 112 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 93 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 113 | glUniform2ui(1, tile_size.width, tile_size.height); | 94 | glUniform2ui(1, tile_size.width, tile_size.height); |
| @@ -137,6 +118,12 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 137 | 118 | ||
| 138 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | 119 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); |
| 139 | } | 120 | } |
| 121 | // Precautionary barrier to ensure the compute shader is done decoding prior to texture access. | ||
| 122 | // GL_TEXTURE_FETCH_BARRIER_BIT and GL_SHADER_IMAGE_ACCESS_BARRIER_BIT are used in a separate | ||
| 123 | // glMemoryBarrier call by the texture cache runtime | ||
| 124 | glMemoryBarrier(GL_UNIFORM_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT | | ||
| 125 | GL_TEXTURE_UPDATE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT | | ||
| 126 | GL_SHADER_STORAGE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT); | ||
| 140 | program_manager.RestoreGuestCompute(); | 127 | program_manager.RestoreGuestCompute(); |
| 141 | } | 128 | } |
| 142 | 129 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e11406e58..205cd3b05 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -30,19 +30,16 @@ | |||
| 30 | namespace Vulkan { | 30 | namespace Vulkan { |
| 31 | 31 | ||
| 32 | using Tegra::Texture::SWIZZLE_TABLE; | 32 | using Tegra::Texture::SWIZZLE_TABLE; |
| 33 | using Tegra::Texture::ASTC::EncodingsValues; | 33 | using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; |
| 34 | using namespace Tegra::Texture::ASTC; | 34 | using namespace Tegra::Texture::ASTC; |
| 35 | 35 | ||
| 36 | namespace { | 36 | namespace { |
| 37 | 37 | ||
| 38 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; | 38 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; |
| 39 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; | 39 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; |
| 40 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; | 40 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; |
| 41 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; | 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; |
| 42 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; | 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; |
| 43 | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; | ||
| 44 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; | ||
| 45 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; | ||
| 46 | 43 | ||
| 47 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 44 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 48 | return { | 45 | return { |
| @@ -71,7 +68,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding | |||
| 71 | }}; | 68 | }}; |
| 72 | } | 69 | } |
| 73 | 70 | ||
| 74 | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | 71 | std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { |
| 75 | return {{ | 72 | return {{ |
| 76 | { | 73 | { |
| 77 | .binding = ASTC_BINDING_INPUT_BUFFER, | 74 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| @@ -88,34 +85,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | |||
| 88 | .pImmutableSamplers = nullptr, | 85 | .pImmutableSamplers = nullptr, |
| 89 | }, | 86 | }, |
| 90 | { | 87 | { |
| 91 | .binding = ASTC_BINDING_6_TO_8_BUFFER, | ||
| 92 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 93 | .descriptorCount = 1, | ||
| 94 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 95 | .pImmutableSamplers = nullptr, | ||
| 96 | }, | ||
| 97 | { | ||
| 98 | .binding = ASTC_BINDING_7_TO_8_BUFFER, | ||
| 99 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 100 | .descriptorCount = 1, | ||
| 101 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 102 | .pImmutableSamplers = nullptr, | ||
| 103 | }, | ||
| 104 | { | ||
| 105 | .binding = ASTC_BINDING_8_TO_8_BUFFER, | ||
| 106 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 107 | .descriptorCount = 1, | ||
| 108 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 109 | .pImmutableSamplers = nullptr, | ||
| 110 | }, | ||
| 111 | { | ||
| 112 | .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||
| 113 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 114 | .descriptorCount = 1, | ||
| 115 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 116 | .pImmutableSamplers = nullptr, | ||
| 117 | }, | ||
| 118 | { | ||
| 119 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | 88 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, |
| 120 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 89 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 121 | .descriptorCount = 1, | 90 | .descriptorCount = 1, |
| @@ -143,7 +112,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 143 | }; | 112 | }; |
| 144 | } | 113 | } |
| 145 | 114 | ||
| 146 | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { | 115 | std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> |
| 116 | BuildASTCPassDescriptorUpdateTemplateEntry() { | ||
| 147 | return {{ | 117 | return {{ |
| 148 | { | 118 | { |
| 149 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | 119 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, |
| @@ -162,38 +132,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 162 | .stride = sizeof(DescriptorUpdateEntry), | 132 | .stride = sizeof(DescriptorUpdateEntry), |
| 163 | }, | 133 | }, |
| 164 | { | 134 | { |
| 165 | .dstBinding = ASTC_BINDING_6_TO_8_BUFFER, | ||
| 166 | .dstArrayElement = 0, | ||
| 167 | .descriptorCount = 1, | ||
| 168 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 169 | .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 170 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 171 | }, | ||
| 172 | { | ||
| 173 | .dstBinding = ASTC_BINDING_7_TO_8_BUFFER, | ||
| 174 | .dstArrayElement = 0, | ||
| 175 | .descriptorCount = 1, | ||
| 176 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 177 | .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 178 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 179 | }, | ||
| 180 | { | ||
| 181 | .dstBinding = ASTC_BINDING_8_TO_8_BUFFER, | ||
| 182 | .dstArrayElement = 0, | ||
| 183 | .descriptorCount = 1, | ||
| 184 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 185 | .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 186 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 187 | }, | ||
| 188 | { | ||
| 189 | .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||
| 190 | .dstArrayElement = 0, | ||
| 191 | .descriptorCount = 1, | ||
| 192 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 193 | .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 194 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 195 | }, | ||
| 196 | { | ||
| 197 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | 135 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, |
| 198 | .dstArrayElement = 0, | 136 | .dstArrayElement = 0, |
| 199 | .descriptorCount = 1, | 137 | .descriptorCount = 1, |
| @@ -222,15 +160,6 @@ struct AstcPushConstants { | |||
| 222 | u32 block_height_mask; | 160 | u32 block_height_mask; |
| 223 | }; | 161 | }; |
| 224 | 162 | ||
| 225 | struct AstcBufferData { | ||
| 226 | decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; | ||
| 227 | decltype(EncodingsValues) encoding_values = EncodingsValues; | ||
| 228 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||
| 229 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||
| 230 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||
| 231 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||
| 232 | } constexpr ASTC_BUFFER_DATA; | ||
| 233 | |||
| 234 | } // Anonymous namespace | 163 | } // Anonymous namespace |
| 235 | 164 | ||
| 236 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 165 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| @@ -423,7 +352,7 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | |||
| 423 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 352 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 424 | 353 | ||
| 425 | void ASTCDecoderPass::MakeDataBuffer() { | 354 | void ASTCDecoderPass::MakeDataBuffer() { |
| 426 | constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE); | 355 | constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); |
| 427 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 356 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ |
| 428 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 357 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 429 | .pNext = nullptr, | 358 | .pNext = nullptr, |
| @@ -437,9 +366,10 @@ void ASTCDecoderPass::MakeDataBuffer() { | |||
| 437 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | 366 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); |
| 438 | 367 | ||
| 439 | const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); | 368 | const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); |
| 440 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); | 369 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, |
| 370 | sizeof(ASTC_ENCODINGS_VALUES)); | ||
| 441 | // Tack on the swizzle table at the end of the buffer | 371 | // Tack on the swizzle table at the end of the buffer |
| 442 | std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE, | 372 | std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, |
| 443 | sizeof(SWIZZLE_TABLE)); | 373 | sizeof(SWIZZLE_TABLE)); |
| 444 | 374 | ||
| 445 | scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, | 375 | scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, |
| @@ -509,18 +439,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 509 | update_descriptor_queue.Acquire(); | 439 | update_descriptor_queue.Acquire(); |
| 510 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, | 440 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, |
| 511 | image.guest_size_bytes - swizzle.buffer_offset); | 441 | image.guest_size_bytes - swizzle.buffer_offset); |
| 512 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), | 442 | update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); |
| 513 | sizeof(AstcBufferData::encoding_values)); | 443 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), |
| 514 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), | ||
| 515 | sizeof(AstcBufferData::replicate_6_to_8)); | ||
| 516 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), | ||
| 517 | sizeof(AstcBufferData::replicate_7_to_8)); | ||
| 518 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), | ||
| 519 | sizeof(AstcBufferData::replicate_8_to_8)); | ||
| 520 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 521 | offsetof(AstcBufferData, replicate_byte_to_16), | ||
| 522 | sizeof(AstcBufferData::replicate_byte_to_16)); | ||
| 523 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), | ||
| 524 | sizeof(SWIZZLE_TABLE)); | 444 | sizeof(SWIZZLE_TABLE)); |
| 525 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 445 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 526 | 446 | ||
| @@ -569,6 +489,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 569 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 489 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 570 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); | 490 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); |
| 571 | }); | 491 | }); |
| 492 | scheduler.Finish(); | ||
| 572 | } | 493 | } |
| 573 | 494 | ||
| 574 | } // namespace Vulkan | 495 | } // namespace Vulkan |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 9b2177ebd..7b756ba41 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -269,7 +269,7 @@ static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, | |||
| 269 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, | 269 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, |
| 270 | u32 nValues) { | 270 | u32 nValues) { |
| 271 | // Determine encoding parameters | 271 | // Determine encoding parameters |
| 272 | IntegerEncodedValue val = EncodingsValues[maxRange]; | 272 | IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange]; |
| 273 | 273 | ||
| 274 | // Start decoding | 274 | // Start decoding |
| 275 | u32 nValsDecoded = 0; | 275 | u32 nValsDecoded = 0; |
| @@ -310,7 +310,7 @@ struct TexelWeightParams { | |||
| 310 | nIdxs *= 2; | 310 | nIdxs *= 2; |
| 311 | } | 311 | } |
| 312 | 312 | ||
| 313 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); | 313 | return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(nIdxs); |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | u32 GetNumWeightValues() const { | 316 | u32 GetNumWeightValues() const { |
| @@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { | |||
| 551 | } | 551 | } |
| 552 | } | 552 | } |
| 553 | } | 553 | } |
| 554 | |||
| 555 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 554 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | 556 | static constexpr u32 ReplicateByteTo16(std::size_t value) { |
| 555 | return REPLICATE_BYTE_TO_16_TABLE[value]; | 557 | return REPLICATE_BYTE_TO_16_TABLE[value]; |
| 556 | } | 558 | } |
| @@ -753,12 +755,12 @@ static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, co | |||
| 753 | // figure out the max value for each of them... | 755 | // figure out the max value for each of them... |
| 754 | u32 range = 256; | 756 | u32 range = 256; |
| 755 | while (--range > 0) { | 757 | while (--range > 0) { |
| 756 | IntegerEncodedValue val = EncodingsValues[range]; | 758 | IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[range]; |
| 757 | u32 bitLength = val.GetBitLength(nValues); | 759 | u32 bitLength = val.GetBitLength(nValues); |
| 758 | if (bitLength <= nBitsForColorData) { | 760 | if (bitLength <= nBitsForColorData) { |
| 759 | // Find the smallest possible range that matches the given encoding | 761 | // Find the smallest possible range that matches the given encoding |
| 760 | while (--range > 0) { | 762 | while (--range > 0) { |
| 761 | IntegerEncodedValue newval = EncodingsValues[range]; | 763 | IntegerEncodedValue newval = ASTC_ENCODINGS_VALUES[range]; |
| 762 | if (!newval.MatchesEncoding(val)) { | 764 | if (!newval.MatchesEncoding(val)) { |
| 763 | break; | 765 | break; |
| 764 | } | 766 | } |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c37dfe7..0229ae122 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -77,7 +77,7 @@ constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { | |||
| 77 | return encodings; | 77 | return encodings; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues(); | 80 | constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues(); |
| 81 | 81 | ||
| 82 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | 82 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] |
| 83 | // is the same as [(num_bits - 1):0] and repeats all the way down. | 83 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| @@ -116,19 +116,10 @@ constexpr auto MakeReplicateTable() { | |||
| 116 | return table; | 116 | return table; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 120 | constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | 119 | constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); |
| 121 | constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | 120 | constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); |
| 122 | constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | 121 | constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); |
| 123 | 122 | ||
| 124 | struct AstcBufferData { | ||
| 125 | decltype(EncodingsValues) encoding_values = EncodingsValues; | ||
| 126 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||
| 127 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||
| 128 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||
| 129 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||
| 130 | } constexpr ASTC_BUFFER_DATA; | ||
| 131 | |||
| 132 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 123 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 133 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); | 124 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |
| 134 | 125 | ||