diff options
| author | 2021-02-13 16:08:50 -0500 | |
|---|---|---|
| committer | 2021-03-13 12:16:03 -0500 | |
| commit | 2f30c105849c214345e2201f4bd6f9b4b76ab4a1 (patch) | |
| tree | 5e5889a44af4194fcf22d1375bdf9f91b5302dc1 /src | |
| parent | astc_decoder: Fix out of bounds memory access (diff) | |
| download | yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.gz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.xz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.zip | |
astc_decoder: Reimplement Layers
Reimplements the approach to decoding layers in the compute shader. Fixes multilayer astc decoding when using Vulkan.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 53 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 158 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 13 |
5 files changed, 161 insertions, 142 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 5be716309..b903a2d37 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -39,17 +39,15 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | |||
| 39 | BEGIN_PUSH_CONSTANTS | 39 | BEGIN_PUSH_CONSTANTS |
| 40 | UNIFORM(0) uvec2 num_image_blocks; | 40 | UNIFORM(0) uvec2 num_image_blocks; |
| 41 | UNIFORM(1) uvec2 block_dims; | 41 | UNIFORM(1) uvec2 block_dims; |
| 42 | UNIFORM(2) uint layer; | ||
| 43 | |||
| 44 | UNIFORM(3) uvec3 origin; | ||
| 45 | UNIFORM(4) ivec3 destination; | ||
| 46 | UNIFORM(5) uint bytes_per_block_log2; | ||
| 47 | UNIFORM(6) uint layer_stride; | ||
| 48 | UNIFORM(7) uint block_size; | ||
| 49 | UNIFORM(8) uint x_shift; | ||
| 50 | UNIFORM(9) uint block_height; | ||
| 51 | UNIFORM(10) uint block_height_mask; | ||
| 52 | 42 | ||
| 43 | UNIFORM(2) uvec3 origin; | ||
| 44 | UNIFORM(3) ivec3 destination; | ||
| 45 | UNIFORM(4) uint bytes_per_block_log2; | ||
| 46 | UNIFORM(5) uint layer_stride; | ||
| 47 | UNIFORM(6) uint block_size; | ||
| 48 | UNIFORM(7) uint x_shift; | ||
| 49 | UNIFORM(8) uint block_height; | ||
| 50 | UNIFORM(9) uint block_height_mask; | ||
| 53 | END_PUSH_CONSTANTS | 51 | END_PUSH_CONSTANTS |
| 54 | 52 | ||
| 55 | uint current_index = 0; | 53 | uint current_index = 0; |
| @@ -82,7 +80,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | |||
| 82 | uint swizzle_table[]; | 80 | uint swizzle_table[]; |
| 83 | }; | 81 | }; |
| 84 | 82 | ||
| 85 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { | 83 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { |
| 86 | uint astc_data[]; | 84 | uint astc_data[]; |
| 87 | }; | 85 | }; |
| 88 | 86 | ||
| @@ -104,7 +102,7 @@ layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BY | |||
| 104 | uint REPLICATE_BYTE_TO_16_TABLE[]; | 102 | uint REPLICATE_BYTE_TO_16_TABLE[]; |
| 105 | }; | 103 | }; |
| 106 | 104 | ||
| 107 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2D dest_image; | 105 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| 108 | 106 | ||
| 109 | const uint GOB_SIZE_X = 64; | 107 | const uint GOB_SIZE_X = 64; |
| 110 | const uint GOB_SIZE_Y = 8; | 108 | const uint GOB_SIZE_Y = 8; |
| @@ -1086,10 +1084,9 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { | |||
| 1086 | void FillError(ivec3 coord) { | 1084 | void FillError(ivec3 coord) { |
| 1087 | for (uint j = 0; j < block_dims.y; j++) { | 1085 | for (uint j = 0; j < block_dims.y; j++) { |
| 1088 | for (uint i = 0; i < block_dims.x; i++) { | 1086 | for (uint i = 0; i < block_dims.x; i++) { |
| 1089 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(1.0, 1.0, 0.0, 1.0)); | 1087 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(1.0, 1.0, 0.0, 1.0)); |
| 1090 | } | 1088 | } |
| 1091 | } | 1089 | } |
| 1092 | return; | ||
| 1093 | } | 1090 | } |
| 1094 | 1091 | ||
| 1095 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { | 1092 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { |
| @@ -1107,7 +1104,7 @@ void FillVoidExtentLDR(ivec3 coord, uint block_index) { | |||
| 1107 | float b = float(b_u) / 65535.0f; | 1104 | float b = float(b_u) / 65535.0f; |
| 1108 | for (uint j = 0; j < block_dims.y; j++) { | 1105 | for (uint j = 0; j < block_dims.y; j++) { |
| 1109 | for (uint i = 0; i < block_dims.x; i++) { | 1106 | for (uint i = 0; i < block_dims.x; i++) { |
| 1110 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(r, g, b, a)); | 1107 | imageStore(dest_image, coord + ivec3(i, j, 0), vec4(r, g, b, a)); |
| 1111 | } | 1108 | } |
| 1112 | } | 1109 | } |
| 1113 | } | 1110 | } |
| @@ -1264,7 +1261,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1264 | } | 1261 | } |
| 1265 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); | 1262 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); |
| 1266 | p = (Cf / 65535.0); | 1263 | p = (Cf / 65535.0); |
| 1267 | imageStore(dest_image, coord.xy + ivec2(i, j), p.gbar); | 1264 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); |
| 1268 | } | 1265 | } |
| 1269 | } | 1266 | } |
| 1270 | } | 1267 | } |
| @@ -1279,7 +1276,7 @@ void main() { | |||
| 1279 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | 1276 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; |
| 1280 | 1277 | ||
| 1281 | uint offset = 0; | 1278 | uint offset = 0; |
| 1282 | offset += layer * layer_stride; | 1279 | offset += pos.z * layer_stride; |
| 1283 | offset += (block_y >> block_height) * block_size; | 1280 | offset += (block_y >> block_height) * block_size; |
| 1284 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | 1281 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; |
| 1285 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1282 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| @@ -1287,7 +1284,7 @@ void main() { | |||
| 1287 | 1284 | ||
| 1288 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); | 1285 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); |
| 1289 | uint block_index = | 1286 | uint block_index = |
| 1290 | layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | 1287 | pos.z * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; |
| 1291 | current_index = 0; | 1288 | current_index = 0; |
| 1292 | bitsread = 0; | 1289 | bitsread = 0; |
| 1293 | for (int i = 0; i < 16; i++) { | 1290 | for (int i = 0; i < 16; i++) { |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index d0979dab1..85722c54a 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -110,7 +110,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 110 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 110 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 111 | static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; | 111 | static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; |
| 112 | static constexpr GLuint LOC_BLOCK_DIMS = 1; | 112 | static constexpr GLuint LOC_BLOCK_DIMS = 1; |
| 113 | static constexpr GLuint LOC_LAYER = 2; | ||
| 114 | 113 | ||
| 115 | const Extent3D tile_size = { | 114 | const Extent3D tile_size = { |
| 116 | VideoCore::Surface::DefaultBlockWidth(image.info.format), | 115 | VideoCore::Surface::DefaultBlockWidth(image.info.format), |
| @@ -130,35 +129,31 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 130 | 129 | ||
| 131 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 130 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 132 | glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); | 131 | glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); |
| 132 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 133 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, | ||
| 134 | GL_WRITE_ONLY, GL_RGBA8); | ||
| 135 | const size_t input_offset = swizzle.buffer_offset + map.offset; | ||
| 136 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||
| 137 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||
| 133 | 138 | ||
| 134 | for (u32 layer = 0; layer < image.info.resources.layers; layer++) { | 139 | glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); |
| 135 | for (const SwizzleParameters& swizzle : swizzles) { | 140 | |
| 136 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_FALSE, | 141 | // To unswizzle the ASTC data |
| 137 | layer, GL_WRITE_ONLY, GL_RGBA8); | 142 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 138 | const size_t input_offset = swizzle.buffer_offset + map.offset; | 143 | glUniform3uiv(2, 1, params.origin.data()); |
| 139 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | 144 | glUniform3iv(3, 1, params.destination.data()); |
| 140 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | 145 | glUniform1ui(4, params.bytes_per_block_log2); |
| 141 | 146 | glUniform1ui(5, params.layer_stride); | |
| 142 | glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); | 147 | glUniform1ui(6, params.block_size); |
| 143 | glUniform1ui(LOC_LAYER, layer); | 148 | glUniform1ui(7, params.x_shift); |
| 144 | 149 | glUniform1ui(8, params.block_height); | |
| 145 | // To unswizzle the ASTC data | 150 | glUniform1ui(9, params.block_height_mask); |
| 146 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 151 | |
| 147 | glUniform3uiv(3, 1, params.origin.data()); | 152 | // ASTC texture data |
| 148 | glUniform3iv(4, 1, params.destination.data()); | 153 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 149 | glUniform1ui(5, params.bytes_per_block_log2); | 154 | image.guest_size_bytes - swizzle.buffer_offset); |
| 150 | glUniform1ui(6, params.layer_stride); | 155 | |
| 151 | glUniform1ui(7, params.block_size); | 156 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); |
| 152 | glUniform1ui(8, params.x_shift); | ||
| 153 | glUniform1ui(9, params.block_height); | ||
| 154 | glUniform1ui(10, params.block_height_mask); | ||
| 155 | |||
| 156 | // ASTC texture data | ||
| 157 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, | ||
| 158 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 159 | |||
| 160 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||
| 161 | } | ||
| 162 | } | 157 | } |
| 163 | program_manager.RestoreGuestCompute(); | 158 | program_manager.RestoreGuestCompute(); |
| 164 | } | 159 | } |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7587ab1e0..a0050b68f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -31,6 +31,7 @@ namespace Vulkan { | |||
| 31 | 31 | ||
| 32 | using Tegra::Texture::SWIZZLE_TABLE; | 32 | using Tegra::Texture::SWIZZLE_TABLE; |
| 33 | using Tegra::Texture::ASTC::EncodingsValues; | 33 | using Tegra::Texture::ASTC::EncodingsValues; |
| 34 | using namespace Tegra::Texture::ASTC; | ||
| 34 | 35 | ||
| 35 | namespace { | 36 | namespace { |
| 36 | 37 | ||
| @@ -214,7 +215,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 214 | struct AstcPushConstants { | 215 | struct AstcPushConstants { |
| 215 | std::array<u32, 2> num_image_blocks; | 216 | std::array<u32, 2> num_image_blocks; |
| 216 | std::array<u32, 2> blocks_dims; | 217 | std::array<u32, 2> blocks_dims; |
| 217 | u32 layer; | ||
| 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; | 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; |
| 219 | }; | 219 | }; |
| 220 | 220 | ||
| @@ -226,6 +226,7 @@ struct AstcBufferData { | |||
| 226 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | 226 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; |
| 227 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | 227 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; |
| 228 | } constexpr ASTC_BUFFER_DATA; | 228 | } constexpr ASTC_BUFFER_DATA; |
| 229 | |||
| 229 | } // Anonymous namespace | 230 | } // Anonymous namespace |
| 230 | 231 | ||
| 231 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 232 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| @@ -403,7 +404,6 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 403 | return {staging.buffer, staging.offset}; | 404 | return {staging.buffer, staging.offset}; |
| 404 | } | 405 | } |
| 405 | 406 | ||
| 406 | using namespace Tegra::Texture::ASTC; | ||
| 407 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 407 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 408 | VKDescriptorPool& descriptor_pool_, | 408 | VKDescriptorPool& descriptor_pool_, |
| 409 | StagingBufferPool& staging_buffer_pool_, | 409 | StagingBufferPool& staging_buffer_pool_, |
| @@ -464,76 +464,94 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 464 | if (!data_buffer) { | 464 | if (!data_buffer) { |
| 465 | MakeDataBuffer(); | 465 | MakeDataBuffer(); |
| 466 | } | 466 | } |
| 467 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | ||
| 468 | const VkImage vk_image = image.Handle(); | ||
| 469 | const bool is_initialized = image.ExchangeInitialization(); | ||
| 470 | scheduler.Record([vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { | ||
| 471 | const VkImageMemoryBarrier image_barrier{ | ||
| 472 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 473 | .pNext = nullptr, | ||
| 474 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 475 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 476 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 477 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 478 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 479 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 480 | .image = vk_image, | ||
| 481 | .subresourceRange{ | ||
| 482 | .aspectMask = aspect_mask, | ||
| 483 | .baseMipLevel = 0, | ||
| 484 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 485 | .baseArrayLayer = 0, | ||
| 486 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 487 | }, | ||
| 488 | }; | ||
| 489 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||
| 490 | }); | ||
| 467 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; | 491 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; |
| 468 | for (s32 layer = 0; layer < image.info.resources.layers; layer++) { | 492 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { |
| 469 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | 493 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 470 | const size_t input_offset = swizzle.buffer_offset + map.offset; | 494 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); |
| 471 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | 495 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); |
| 472 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | 496 | const u32 num_dispatches_z = image.info.resources.layers; |
| 473 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; | 497 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; |
| 474 | const u32 layer_image_size = | 498 | const u32 layer_image_size = |
| 475 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); | 499 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); |
| 476 | 500 | ||
| 477 | update_descriptor_queue.Acquire(); | 501 | update_descriptor_queue.Acquire(); |
| 478 | update_descriptor_queue.AddBuffer(*data_buffer, | 502 | update_descriptor_queue.AddBuffer(*data_buffer, |
| 479 | offsetof(AstcBufferData, swizzle_table_buffer), | 503 | offsetof(AstcBufferData, swizzle_table_buffer), |
| 480 | sizeof(AstcBufferData::swizzle_table_buffer)); | 504 | sizeof(AstcBufferData::swizzle_table_buffer)); |
| 481 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes); | 505 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, layer_image_size); |
| 482 | update_descriptor_queue.AddBuffer(*data_buffer, | 506 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), |
| 483 | offsetof(AstcBufferData, encoding_values), | 507 | sizeof(AstcBufferData::encoding_values)); |
| 484 | sizeof(AstcBufferData::encoding_values)); | 508 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), |
| 485 | update_descriptor_queue.AddBuffer(*data_buffer, | 509 | sizeof(AstcBufferData::replicate_6_to_8)); |
| 486 | offsetof(AstcBufferData, replicate_6_to_8), | 510 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), |
| 487 | sizeof(AstcBufferData::replicate_6_to_8)); | 511 | sizeof(AstcBufferData::replicate_7_to_8)); |
| 488 | update_descriptor_queue.AddBuffer(*data_buffer, | 512 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), |
| 489 | offsetof(AstcBufferData, replicate_7_to_8), | 513 | sizeof(AstcBufferData::replicate_8_to_8)); |
| 490 | sizeof(AstcBufferData::replicate_7_to_8)); | 514 | update_descriptor_queue.AddBuffer(*data_buffer, |
| 491 | update_descriptor_queue.AddBuffer(*data_buffer, | 515 | offsetof(AstcBufferData, replicate_byte_to_16), |
| 492 | offsetof(AstcBufferData, replicate_8_to_8), | 516 | sizeof(AstcBufferData::replicate_byte_to_16)); |
| 493 | sizeof(AstcBufferData::replicate_8_to_8)); | 517 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 494 | update_descriptor_queue.AddBuffer(*data_buffer, | 518 | |
| 495 | offsetof(AstcBufferData, replicate_byte_to_16), | 519 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 496 | sizeof(AstcBufferData::replicate_byte_to_16)); | 520 | const VkPipelineLayout vk_layout = *layout; |
| 497 | update_descriptor_queue.AddImage(image.StorageImageView()); | 521 | const VkPipeline vk_pipeline = *pipeline; |
| 498 | 522 | // To unswizzle the ASTC data | |
| 499 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 523 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 500 | // To unswizzle the ASTC data | 524 | scheduler.Record([vk_layout, vk_pipeline, buffer = map.buffer, num_dispatches_x, |
| 501 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 525 | num_dispatches_y, num_dispatches_z, num_image_blocks, block_dims, params, |
| 502 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = map.buffer, | 526 | set, input_offset](vk::CommandBuffer cmdbuf) { |
| 503 | num_dispatches_x, num_dispatches_y, layer_image_size, | 527 | const AstcPushConstants uniforms{num_image_blocks, block_dims, params}; |
| 504 | num_image_blocks, block_dims, layer, params, set, | 528 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); |
| 505 | image = image.Handle(), input_offset, | 529 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); |
| 506 | aspect_mask = image.AspectMask()](vk::CommandBuffer cmdbuf) { | 530 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); |
| 507 | const AstcPushConstants uniforms{num_image_blocks, block_dims, layer, params}; | 531 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| 508 | 532 | }); | |
| 509 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 510 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 511 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 512 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, 1); | ||
| 513 | |||
| 514 | const VkImageMemoryBarrier image_barrier{ | ||
| 515 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 516 | .pNext = nullptr, | ||
| 517 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 518 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, | ||
| 519 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 520 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 521 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 522 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 523 | .image = image, | ||
| 524 | .subresourceRange{ | ||
| 525 | .aspectMask = aspect_mask, | ||
| 526 | .baseMipLevel = 0, | ||
| 527 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 528 | .baseArrayLayer = 0, | ||
| 529 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 530 | }, | ||
| 531 | }; | ||
| 532 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 533 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); | ||
| 534 | }); | ||
| 535 | } | ||
| 536 | } | 533 | } |
| 534 | scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) { | ||
| 535 | const VkImageMemoryBarrier image_barrier{ | ||
| 536 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 537 | .pNext = nullptr, | ||
| 538 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 539 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 540 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 541 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 542 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 543 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 544 | .image = vk_image, | ||
| 545 | .subresourceRange{ | ||
| 546 | .aspectMask = aspect_mask, | ||
| 547 | .baseMipLevel = 0, | ||
| 548 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 549 | .baseArrayLayer = 0, | ||
| 550 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 551 | }, | ||
| 552 | }; | ||
| 553 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||
| 554 | }); | ||
| 537 | } | 555 | } |
| 538 | 556 | ||
| 539 | } // namespace Vulkan | 557 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f7f744587..18155e449 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -823,27 +823,31 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 823 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, | 823 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, |
| 824 | }; | 824 | }; |
| 825 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 825 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 826 | storage_image_view = runtime.device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 826 | const auto& device = runtime.device.GetLogical(); |
| 827 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 827 | storage_image_views.reserve(info.resources.levels); |
| 828 | .pNext = &storage_image_view_usage_create_info, | 828 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 829 | .flags = 0, | 829 | storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ |
| 830 | .image = *image, | 830 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 831 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | 831 | .pNext = &storage_image_view_usage_create_info, |
| 832 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | 832 | .flags = 0, |
| 833 | .components{ | 833 | .image = *image, |
| 834 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 834 | .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, |
| 835 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 835 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| 836 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | 836 | .components{ |
| 837 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | 837 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 838 | }, | 838 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 839 | .subresourceRange{ | 839 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 840 | .aspectMask = aspect_mask, | 840 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 841 | .baseMipLevel = 0, | 841 | }, |
| 842 | .levelCount = VK_REMAINING_MIP_LEVELS, | 842 | .subresourceRange{ |
| 843 | .baseArrayLayer = 0, | 843 | .aspectMask = aspect_mask, |
| 844 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 844 | .baseMipLevel = static_cast<u32>(level), |
| 845 | }, | 845 | .levelCount = 1, |
| 846 | }); | 846 | .baseArrayLayer = 0, |
| 847 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 848 | }, | ||
| 849 | })); | ||
| 850 | } | ||
| 847 | } | 851 | } |
| 848 | } | 852 | } |
| 849 | 853 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 51705eccb..628785d5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -121,12 +121,17 @@ public: | |||
| 121 | return *buffer; | 121 | return *buffer; |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { | 124 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { |
| 125 | return aspect_mask; | 125 | return aspect_mask; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | [[nodiscard]] VkImageView StorageImageView() const noexcept { | 128 | [[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept { |
| 129 | return *storage_image_view; | 129 | return *storage_image_views[level]; |
| 130 | } | ||
| 131 | |||
| 132 | /// Returns true when the image is already initialized and mark it as initialized | ||
| 133 | [[nodiscard]] bool ExchangeInitialization() noexcept { | ||
| 134 | return std::exchange(initialized, true); | ||
| 130 | } | 135 | } |
| 131 | 136 | ||
| 132 | private: | 137 | private: |
| @@ -135,7 +140,7 @@ private: | |||
| 135 | vk::Buffer buffer; | 140 | vk::Buffer buffer; |
| 136 | MemoryCommit commit; | 141 | MemoryCommit commit; |
| 137 | vk::ImageView image_view; | 142 | vk::ImageView image_view; |
| 138 | vk::ImageView storage_image_view; | 143 | std::vector<vk::ImageView> storage_image_views; |
| 139 | VkImageAspectFlags aspect_mask = 0; | 144 | VkImageAspectFlags aspect_mask = 0; |
| 140 | bool initialized = false; | 145 | bool initialized = false; |
| 141 | }; | 146 | }; |