diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/maxwell_to_vk.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 298 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 45 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 12 | ||||
| -rw-r--r-- | src/video_core/texture_cache/accelerated_swizzle.h | 4 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 18 |
11 files changed, 426 insertions, 57 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 070190a5c..2ddac2e1d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | #define BINDING_7_TO_8_BUFFER 4 | 16 | #define BINDING_7_TO_8_BUFFER 4 |
| 17 | #define BINDING_8_TO_8_BUFFER 5 | 17 | #define BINDING_8_TO_8_BUFFER 5 |
| 18 | #define BINDING_BYTE_TO_16_BUFFER 6 | 18 | #define BINDING_BYTE_TO_16_BUFFER 6 |
| 19 | #define BINDING_OUTPUT_IMAGE 3 | 19 | #define BINDING_OUTPUT_IMAGE 7 |
| 20 | 20 | ||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
| 22 | 22 | ||
| @@ -85,7 +85,26 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | |||
| 85 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { | 85 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { |
| 86 | uint astc_data[]; | 86 | uint astc_data[]; |
| 87 | }; | 87 | }; |
| 88 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly image2D dest_image; | 88 | |
| 89 | // ASTC Encodings data | ||
| 90 | layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { | ||
| 91 | EncodingData encoding_values[]; | ||
| 92 | }; | ||
| 93 | // ASTC Precompiled tables | ||
| 94 | layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 { | ||
| 95 | uint REPLICATE_6_BIT_TO_8_TABLE[]; | ||
| 96 | }; | ||
| 97 | layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 { | ||
| 98 | uint REPLICATE_7_BIT_TO_8_TABLE[]; | ||
| 99 | }; | ||
| 100 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | ||
| 101 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | ||
| 102 | }; | ||
| 103 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 104 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 105 | }; | ||
| 106 | |||
| 107 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2D dest_image; | ||
| 89 | 108 | ||
| 90 | const uint GOB_SIZE_X = 64; | 109 | const uint GOB_SIZE_X = 64; |
| 91 | const uint GOB_SIZE_Y = 8; | 110 | const uint GOB_SIZE_Y = 8; |
| @@ -109,23 +128,6 @@ uint ReadTexel(uint offset) { | |||
| 109 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); | 128 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); |
| 110 | } | 129 | } |
| 111 | 130 | ||
| 112 | // ASTC Encodings data | ||
| 113 | layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { | ||
| 114 | EncodingData encoding_values[256]; | ||
| 115 | }; | ||
| 116 | // ASTC Precompiled tables | ||
| 117 | layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 { | ||
| 118 | uint REPLICATE_6_BIT_TO_8_TABLE[]; | ||
| 119 | }; | ||
| 120 | layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 { | ||
| 121 | uint REPLICATE_7_BIT_TO_8_TABLE[]; | ||
| 122 | }; | ||
| 123 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | ||
| 124 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | ||
| 125 | }; | ||
| 126 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 127 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 128 | }; | ||
| 129 | 131 | ||
| 130 | const int BLOCK_SIZE_IN_BYTES = 16; | 132 | const int BLOCK_SIZE_IN_BYTES = 16; |
| 131 | 133 | ||
| @@ -1275,8 +1277,7 @@ void main() { | |||
| 1275 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1277 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 1276 | offset += swizzle; | 1278 | offset += swizzle; |
| 1277 | 1279 | ||
| 1278 | const ivec3 invocation_destination = ivec3(gl_GlobalInvocationID + destination); | 1280 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); |
| 1279 | const ivec3 coord = ivec3(invocation_destination * uvec3(block_dims, 1.0)); | ||
| 1280 | uint block_index = | 1281 | uint block_index = |
| 1281 | layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | 1282 | layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; |
| 1282 | current_index = 0; | 1283 | current_index = 0; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 19aaf034f..f088447e9 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -166,7 +166,7 @@ struct FormatTuple { | |||
| 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT | 166 | {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT |
| 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM | 167 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM |
| 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT | 168 | {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT |
| 169 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB | 169 | {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB |
| 170 | {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM | 170 | {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM |
| 171 | {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM | 171 | {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM |
| 172 | {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT | 172 | {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 2f9a7b028..7587ab1e0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -11,18 +11,38 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | ||
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 16 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 17 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 18 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 20 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 22 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 23 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 24 | #include "video_core/texture_cache/types.h" | ||
| 25 | #include "video_core/textures/astc.h" | ||
| 26 | #include "video_core/textures/decoders.h" | ||
| 21 | #include "video_core/vulkan_common/vulkan_device.h" | 27 | #include "video_core/vulkan_common/vulkan_device.h" |
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 23 | 29 | ||
| 24 | namespace Vulkan { | 30 | namespace Vulkan { |
| 31 | |||
| 32 | using Tegra::Texture::SWIZZLE_TABLE; | ||
| 33 | using Tegra::Texture::ASTC::EncodingsValues; | ||
| 34 | |||
| 25 | namespace { | 35 | namespace { |
| 36 | |||
| 37 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 0; | ||
| 38 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 1; | ||
| 39 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 2; | ||
| 40 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 3; | ||
| 41 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 4; | ||
| 42 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 5; | ||
| 43 | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 6; | ||
| 44 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; | ||
| 45 | |||
| 26 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 46 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 27 | return { | 47 | return { |
| 28 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 48 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| @@ -50,6 +70,67 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding | |||
| 50 | }}; | 70 | }}; |
| 51 | } | 71 | } |
| 52 | 72 | ||
| 73 | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | ||
| 74 | return {{ | ||
| 75 | { | ||
| 76 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, // Swizzle buffer | ||
| 77 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 78 | .descriptorCount = 1, | ||
| 79 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 80 | .pImmutableSamplers = nullptr, | ||
| 81 | }, | ||
| 82 | { | ||
| 83 | .binding = ASTC_BINDING_INPUT_BUFFER, // ASTC Img data buffer | ||
| 84 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 85 | .descriptorCount = 1, | ||
| 86 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 87 | .pImmutableSamplers = nullptr, | ||
| 88 | }, | ||
| 89 | { | ||
| 90 | .binding = ASTC_BINDING_ENC_BUFFER, // Encodings buffer | ||
| 91 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 92 | .descriptorCount = 1, | ||
| 93 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 94 | .pImmutableSamplers = nullptr, | ||
| 95 | }, | ||
| 96 | { | ||
| 97 | .binding = ASTC_BINDING_6_TO_8_BUFFER, // BINDING_6_TO_8_BUFFER | ||
| 98 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 99 | .descriptorCount = 1, | ||
| 100 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 101 | .pImmutableSamplers = nullptr, | ||
| 102 | }, | ||
| 103 | { | ||
| 104 | .binding = ASTC_BINDING_7_TO_8_BUFFER, // BINDING_7_TO_8_BUFFER | ||
| 105 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 106 | .descriptorCount = 1, | ||
| 107 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 108 | .pImmutableSamplers = nullptr, | ||
| 109 | }, | ||
| 110 | { | ||
| 111 | .binding = ASTC_BINDING_8_TO_8_BUFFER, // BINDING_8_TO_8_BUFFER | ||
| 112 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 113 | .descriptorCount = 1, | ||
| 114 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 115 | .pImmutableSamplers = nullptr, | ||
| 116 | }, | ||
| 117 | { | ||
| 118 | .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, // BINDING_BYTE_TO_16_BUFFER | ||
| 119 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 120 | .descriptorCount = 1, | ||
| 121 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 122 | .pImmutableSamplers = nullptr, | ||
| 123 | }, | ||
| 124 | { | ||
| 125 | .binding = ASTC_BINDING_OUTPUT_IMAGE, // Output image | ||
| 126 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 127 | .descriptorCount = 1, | ||
| 128 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 129 | .pImmutableSamplers = nullptr, | ||
| 130 | }, | ||
| 131 | }}; | ||
| 132 | } | ||
| 133 | |||
| 53 | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | 134 | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { |
| 54 | return { | 135 | return { |
| 55 | .dstBinding = 0, | 136 | .dstBinding = 0, |
| @@ -61,6 +142,90 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 61 | }; | 142 | }; |
| 62 | } | 143 | } |
| 63 | 144 | ||
| 145 | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { | ||
| 146 | return {{ | ||
| 147 | { | ||
| 148 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 149 | .dstArrayElement = 0, | ||
| 150 | .descriptorCount = 1, | ||
| 151 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 152 | .offset = 0 * sizeof(DescriptorUpdateEntry), | ||
| 153 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 154 | }, | ||
| 155 | { | ||
| 156 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | ||
| 157 | .dstArrayElement = 0, | ||
| 158 | .descriptorCount = 1, | ||
| 159 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 160 | .offset = 1 * sizeof(DescriptorUpdateEntry), | ||
| 161 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 162 | }, | ||
| 163 | { | ||
| 164 | .dstBinding = ASTC_BINDING_ENC_BUFFER, | ||
| 165 | .dstArrayElement = 0, | ||
| 166 | .descriptorCount = 1, | ||
| 167 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 168 | .offset = 2 * sizeof(DescriptorUpdateEntry), | ||
| 169 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 170 | }, | ||
| 171 | { | ||
| 172 | .dstBinding = ASTC_BINDING_6_TO_8_BUFFER, | ||
| 173 | .dstArrayElement = 0, | ||
| 174 | .descriptorCount = 1, | ||
| 175 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 176 | .offset = 3 * sizeof(DescriptorUpdateEntry), | ||
| 177 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 178 | }, | ||
| 179 | { | ||
| 180 | .dstBinding = ASTC_BINDING_7_TO_8_BUFFER, | ||
| 181 | .dstArrayElement = 0, | ||
| 182 | .descriptorCount = 1, | ||
| 183 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 184 | .offset = 4 * sizeof(DescriptorUpdateEntry), | ||
| 185 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 186 | }, | ||
| 187 | { | ||
| 188 | .dstBinding = ASTC_BINDING_8_TO_8_BUFFER, | ||
| 189 | .dstArrayElement = 0, | ||
| 190 | .descriptorCount = 1, | ||
| 191 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 192 | .offset = 5 * sizeof(DescriptorUpdateEntry), | ||
| 193 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 194 | }, | ||
| 195 | { | ||
| 196 | .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||
| 197 | .dstArrayElement = 0, | ||
| 198 | .descriptorCount = 1, | ||
| 199 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 200 | .offset = 6 * sizeof(DescriptorUpdateEntry), | ||
| 201 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 202 | }, | ||
| 203 | { | ||
| 204 | .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, | ||
| 205 | .dstArrayElement = 0, | ||
| 206 | .descriptorCount = 1, | ||
| 207 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 208 | .offset = 7 * sizeof(DescriptorUpdateEntry), | ||
| 209 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 210 | }, | ||
| 211 | }}; | ||
| 212 | } | ||
| 213 | |||
| 214 | struct AstcPushConstants { | ||
| 215 | std::array<u32, 2> num_image_blocks; | ||
| 216 | std::array<u32, 2> blocks_dims; | ||
| 217 | u32 layer; | ||
| 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; | ||
| 219 | }; | ||
| 220 | |||
| 221 | struct AstcBufferData { | ||
| 222 | decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; | ||
| 223 | decltype(EncodingsValues) encoding_values = EncodingsValues; | ||
| 224 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||
| 225 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||
| 226 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||
| 227 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||
| 228 | } constexpr ASTC_BUFFER_DATA; | ||
| 64 | } // Anonymous namespace | 229 | } // Anonymous namespace |
| 65 | 230 | ||
| 66 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 231 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| @@ -238,4 +403,137 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 238 | return {staging.buffer, staging.offset}; | 403 | return {staging.buffer, staging.offset}; |
| 239 | } | 404 | } |
| 240 | 405 | ||
| 406 | using namespace Tegra::Texture::ASTC; | ||
| 407 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ||
| 408 | VKDescriptorPool& descriptor_pool_, | ||
| 409 | StagingBufferPool& staging_buffer_pool_, | ||
| 410 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 411 | MemoryAllocator& memory_allocator_) | ||
| 412 | : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), | ||
| 413 | BuildASTCPassDescriptorUpdateTemplateEntry(), | ||
| 414 | BuildComputePushConstantRange(sizeof(AstcPushConstants)), | ||
| 415 | ASTC_DECODER_COMP_SPV), | ||
| 416 | device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 417 | update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} | ||
| 418 | |||
| 419 | ASTCDecoderPass::~ASTCDecoderPass() = default; | ||
| 420 | |||
| 421 | void ASTCDecoderPass::MakeDataBuffer() { | ||
| 422 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 423 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 424 | .pNext = nullptr, | ||
| 425 | .flags = 0, | ||
| 426 | .size = sizeof(ASTC_BUFFER_DATA), | ||
| 427 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 428 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 429 | .queueFamilyIndexCount = 0, | ||
| 430 | .pQueueFamilyIndices = nullptr, | ||
| 431 | }); | ||
| 432 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | ||
| 433 | |||
| 434 | const auto staging_ref = | ||
| 435 | staging_buffer_pool.Request(sizeof(ASTC_BUFFER_DATA), MemoryUsage::Upload); | ||
| 436 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); | ||
| 437 | scheduler.Record([src = staging_ref.buffer, dst = *data_buffer](vk::CommandBuffer cmdbuf) { | ||
| 438 | cmdbuf.CopyBuffer(src, dst, | ||
| 439 | VkBufferCopy{ | ||
| 440 | .srcOffset = 0, | ||
| 441 | .dstOffset = 0, | ||
| 442 | .size = sizeof(ASTC_BUFFER_DATA), | ||
| 443 | }); | ||
| 444 | cmdbuf.PipelineBarrier( | ||
| 445 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, | ||
| 446 | VkMemoryBarrier{ | ||
| 447 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 448 | .pNext = nullptr, | ||
| 449 | .srcAccessMask = 0, | ||
| 450 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 451 | }, | ||
| 452 | {}, {}); | ||
| 453 | }); | ||
| 454 | } | ||
| 455 | |||
| 456 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | ||
| 457 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||
| 458 | using namespace VideoCommon::Accelerated; | ||
| 459 | const VideoCommon::Extent2D tile_size{ | ||
| 460 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||
| 461 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||
| 462 | }; | ||
| 463 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 464 | if (!data_buffer) { | ||
| 465 | MakeDataBuffer(); | ||
| 466 | } | ||
| 467 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; | ||
| 468 | for (s32 layer = 0; layer < image.info.resources.layers; layer++) { | ||
| 469 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | ||
| 470 | const size_t input_offset = swizzle.buffer_offset + map.offset; | ||
| 471 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||
| 472 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||
| 473 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; | ||
| 474 | const u32 layer_image_size = | ||
| 475 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); | ||
| 476 | |||
| 477 | update_descriptor_queue.Acquire(); | ||
| 478 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 479 | offsetof(AstcBufferData, swizzle_table_buffer), | ||
| 480 | sizeof(AstcBufferData::swizzle_table_buffer)); | ||
| 481 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes); | ||
| 482 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 483 | offsetof(AstcBufferData, encoding_values), | ||
| 484 | sizeof(AstcBufferData::encoding_values)); | ||
| 485 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 486 | offsetof(AstcBufferData, replicate_6_to_8), | ||
| 487 | sizeof(AstcBufferData::replicate_6_to_8)); | ||
| 488 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 489 | offsetof(AstcBufferData, replicate_7_to_8), | ||
| 490 | sizeof(AstcBufferData::replicate_7_to_8)); | ||
| 491 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 492 | offsetof(AstcBufferData, replicate_8_to_8), | ||
| 493 | sizeof(AstcBufferData::replicate_8_to_8)); | ||
| 494 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 495 | offsetof(AstcBufferData, replicate_byte_to_16), | ||
| 496 | sizeof(AstcBufferData::replicate_byte_to_16)); | ||
| 497 | update_descriptor_queue.AddImage(image.StorageImageView()); | ||
| 498 | |||
| 499 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 500 | // To unswizzle the ASTC data | ||
| 501 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||
| 502 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = map.buffer, | ||
| 503 | num_dispatches_x, num_dispatches_y, layer_image_size, | ||
| 504 | num_image_blocks, block_dims, layer, params, set, | ||
| 505 | image = image.Handle(), input_offset, | ||
| 506 | aspect_mask = image.AspectMask()](vk::CommandBuffer cmdbuf) { | ||
| 507 | const AstcPushConstants uniforms{num_image_blocks, block_dims, layer, params}; | ||
| 508 | |||
| 509 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 510 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 511 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 512 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, 1); | ||
| 513 | |||
| 514 | const VkImageMemoryBarrier image_barrier{ | ||
| 515 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 516 | .pNext = nullptr, | ||
| 517 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 518 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, | ||
| 519 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 520 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 521 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 522 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 523 | .image = image, | ||
| 524 | .subresourceRange{ | ||
| 525 | .aspectMask = aspect_mask, | ||
| 526 | .baseMipLevel = 0, | ||
| 527 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 528 | .baseArrayLayer = 0, | ||
| 529 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 530 | }, | ||
| 531 | }; | ||
| 532 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 533 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); | ||
| 534 | }); | ||
| 535 | } | ||
| 536 | } | ||
| 537 | } | ||
| 538 | |||
| 241 | } // namespace Vulkan | 539 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 17d781d99..5ea187c30 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -11,14 +11,21 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 14 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 16 | ||
| 17 | namespace VideoCommon { | ||
| 18 | struct SwizzleParameters; | ||
| 19 | } | ||
| 20 | |||
| 16 | namespace Vulkan { | 21 | namespace Vulkan { |
| 17 | 22 | ||
| 18 | class Device; | 23 | class Device; |
| 19 | class StagingBufferPool; | 24 | class StagingBufferPool; |
| 20 | class VKScheduler; | 25 | class VKScheduler; |
| 21 | class VKUpdateDescriptorQueue; | 26 | class VKUpdateDescriptorQueue; |
| 27 | class Image; | ||
| 28 | struct StagingBufferRef; | ||
| 22 | 29 | ||
| 23 | class VKComputePass { | 30 | class VKComputePass { |
| 24 | public: | 31 | public: |
| @@ -77,4 +84,29 @@ private: | |||
| 77 | VKUpdateDescriptorQueue& update_descriptor_queue; | 84 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 78 | }; | 85 | }; |
| 79 | 86 | ||
| 87 | class ASTCDecoderPass final : public VKComputePass { | ||
| 88 | public: | ||
| 89 | explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | ||
| 90 | VKDescriptorPool& descriptor_pool_, | ||
| 91 | StagingBufferPool& staging_buffer_pool_, | ||
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 93 | MemoryAllocator& memory_allocator_); | ||
| 94 | ~ASTCDecoderPass(); | ||
| 95 | |||
| 96 | void Assemble(Image& image, const StagingBufferRef& map, | ||
| 97 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 98 | |||
| 99 | private: | ||
| 100 | void MakeDataBuffer(); | ||
| 101 | |||
| 102 | const Device& device; | ||
| 103 | VKScheduler& scheduler; | ||
| 104 | StagingBufferPool& staging_buffer_pool; | ||
| 105 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 106 | MemoryAllocator& memory_allocator; | ||
| 107 | |||
| 108 | vk::Buffer data_buffer; | ||
| 109 | MemoryCommit data_buffer_commit; | ||
| 110 | }; | ||
| 111 | |||
| 80 | } // namespace Vulkan | 112 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index dfd38f575..df5b7b172 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -241,7 +241,10 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 241 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 241 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 242 | update_descriptor_queue(device, scheduler), | 242 | update_descriptor_queue(device, scheduler), |
| 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 244 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, | 244 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, |
| 245 | memory_allocator), | ||
| 246 | texture_cache_runtime{device, scheduler, memory_allocator, | ||
| 247 | staging_pool, blit_image, astc_decoder_pass}, | ||
| 245 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 248 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 246 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 249 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 247 | update_descriptor_queue, descriptor_pool), | 250 | update_descriptor_queue, descriptor_pool), |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index acea1ba2d..235afc6f3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -173,6 +173,7 @@ private: | |||
| 173 | VKDescriptorPool descriptor_pool; | 173 | VKDescriptorPool descriptor_pool; |
| 174 | VKUpdateDescriptorQueue update_descriptor_queue; | 174 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 175 | BlitImageHelper blit_image; | 175 | BlitImageHelper blit_image; |
| 176 | ASTCDecoderPass astc_decoder_pass; | ||
| 176 | 177 | ||
| 177 | GraphicsPipelineCacheKey graphics_key; | 178 | GraphicsPipelineCacheKey graphics_key; |
| 178 | 179 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 22a1014a9..f7f744587 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/renderer_vulkan/blit_image.h" | 11 | #include "video_core/renderer_vulkan/blit_image.h" |
| 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 13 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 14 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 16 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -807,7 +808,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 807 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 808 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); |
| 808 | } | 809 | } |
| 809 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 810 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 810 | flags |= VideoCommon::ImageFlagBits::Converted; | 811 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; |
| 811 | } | 812 | } |
| 812 | if (runtime.device.HasDebuggingToolAttached()) { | 813 | if (runtime.device.HasDebuggingToolAttached()) { |
| 813 | if (image) { | 814 | if (image) { |
| @@ -816,6 +817,34 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 816 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 817 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 817 | } | 818 | } |
| 818 | } | 819 | } |
| 820 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ | ||
| 821 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | ||
| 822 | .pNext = nullptr, | ||
| 823 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, | ||
| 824 | }; | ||
| 825 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | ||
| 826 | storage_image_view = runtime.device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | ||
| 827 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 828 | .pNext = &storage_image_view_usage_create_info, | ||
| 829 | .flags = 0, | ||
| 830 | .image = *image, | ||
| 831 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 832 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | ||
| 833 | .components{ | ||
| 834 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 835 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 836 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 837 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 838 | }, | ||
| 839 | .subresourceRange{ | ||
| 840 | .aspectMask = aspect_mask, | ||
| 841 | .baseMipLevel = 0, | ||
| 842 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 843 | .baseArrayLayer = 0, | ||
| 844 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 845 | }, | ||
| 846 | }); | ||
| 847 | } | ||
| 819 | } | 848 | } |
| 820 | 849 | ||
| 821 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 850 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| @@ -918,7 +947,6 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 918 | } | 947 | } |
| 919 | } | 948 | } |
| 920 | const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); | 949 | const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |
| 921 | const VkFormat vk_format = format_info.format; | ||
| 922 | const VkImageViewUsageCreateInfo image_view_usage{ | 950 | const VkImageViewUsageCreateInfo image_view_usage{ |
| 923 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | 951 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, |
| 924 | .pNext = nullptr, | 952 | .pNext = nullptr, |
| @@ -930,7 +958,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 930 | .flags = 0, | 958 | .flags = 0, |
| 931 | .image = image.Handle(), | 959 | .image = image.Handle(), |
| 932 | .viewType = VkImageViewType{}, | 960 | .viewType = VkImageViewType{}, |
| 933 | .format = vk_format, | 961 | .format = format_info.format, |
| 934 | .components{ | 962 | .components{ |
| 935 | .r = ComponentSwizzle(swizzle[0]), | 963 | .r = ComponentSwizzle(swizzle[0]), |
| 936 | .g = ComponentSwizzle(swizzle[1]), | 964 | .g = ComponentSwizzle(swizzle[1]), |
| @@ -982,7 +1010,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 982 | .pNext = nullptr, | 1010 | .pNext = nullptr, |
| 983 | .flags = 0, | 1011 | .flags = 0, |
| 984 | .buffer = image.Buffer(), | 1012 | .buffer = image.Buffer(), |
| 985 | .format = vk_format, | 1013 | .format = format_info.format, |
| 986 | .offset = 0, // TODO: Redesign buffer cache to support this | 1014 | .offset = 0, // TODO: Redesign buffer cache to support this |
| 987 | .range = image.guest_size_bytes, | 1015 | .range = image.guest_size_bytes, |
| 988 | }); | 1016 | }); |
| @@ -1167,4 +1195,13 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1167 | } | 1195 | } |
| 1168 | } | 1196 | } |
| 1169 | 1197 | ||
| 1198 | void TextureCacheRuntime::AccelerateImageUpload( | ||
| 1199 | Image& image, const StagingBufferRef& map, | ||
| 1200 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||
| 1201 | if (IsPixelFormatASTC(image.info.format)) { | ||
| 1202 | return astc_decoder_pass.Assemble(image, map, swizzles); | ||
| 1203 | } | ||
| 1204 | UNREACHABLE(); | ||
| 1205 | } | ||
| 1206 | |||
| 1170 | } // namespace Vulkan | 1207 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 3aee27ce0..51705eccb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -20,6 +20,7 @@ using VideoCommon::Offset2D; | |||
| 20 | using VideoCommon::RenderTargets; | 20 | using VideoCommon::RenderTargets; |
| 21 | using VideoCore::Surface::PixelFormat; | 21 | using VideoCore::Surface::PixelFormat; |
| 22 | 22 | ||
| 23 | class ASTCDecoderPass; | ||
| 23 | class BlitImageHelper; | 24 | class BlitImageHelper; |
| 24 | class Device; | 25 | class Device; |
| 25 | class Image; | 26 | class Image; |
| @@ -60,6 +61,7 @@ struct TextureCacheRuntime { | |||
| 60 | MemoryAllocator& memory_allocator; | 61 | MemoryAllocator& memory_allocator; |
| 61 | StagingBufferPool& staging_buffer_pool; | 62 | StagingBufferPool& staging_buffer_pool; |
| 62 | BlitImageHelper& blit_image_helper; | 63 | BlitImageHelper& blit_image_helper; |
| 64 | ASTCDecoderPass& astc_decoder_pass; | ||
| 63 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; | 65 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; |
| 64 | 66 | ||
| 65 | void Finish(); | 67 | void Finish(); |
| @@ -83,9 +85,7 @@ struct TextureCacheRuntime { | |||
| 83 | } | 85 | } |
| 84 | 86 | ||
| 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, | 87 | void AccelerateImageUpload(Image&, const StagingBufferRef&, |
| 86 | std::span<const VideoCommon::SwizzleParameters>) { | 88 | std::span<const VideoCommon::SwizzleParameters>); |
| 87 | UNREACHABLE(); | ||
| 88 | } | ||
| 89 | 89 | ||
| 90 | void InsertUploadMemoryBarrier() {} | 90 | void InsertUploadMemoryBarrier() {} |
| 91 | 91 | ||
| @@ -125,11 +125,17 @@ public: | |||
| 125 | return aspect_mask; | 125 | return aspect_mask; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | [[nodiscard]] VkImageView StorageImageView() const noexcept { | ||
| 129 | return *storage_image_view; | ||
| 130 | } | ||
| 131 | |||
| 128 | private: | 132 | private: |
| 129 | VKScheduler* scheduler; | 133 | VKScheduler* scheduler; |
| 130 | vk::Image image; | 134 | vk::Image image; |
| 131 | vk::Buffer buffer; | 135 | vk::Buffer buffer; |
| 132 | MemoryCommit commit; | 136 | MemoryCommit commit; |
| 137 | vk::ImageView image_view; | ||
| 138 | vk::ImageView storage_image_view; | ||
| 133 | VkImageAspectFlags aspect_mask = 0; | 139 | VkImageAspectFlags aspect_mask = 0; |
| 134 | bool initialized = false; | 140 | bool initialized = false; |
| 135 | }; | 141 | }; |
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h index 6ec5c78c4..a11c924e1 100644 --- a/src/video_core/texture_cache/accelerated_swizzle.h +++ b/src/video_core/texture_cache/accelerated_swizzle.h | |||
| @@ -13,8 +13,8 @@ | |||
| 13 | namespace VideoCommon::Accelerated { | 13 | namespace VideoCommon::Accelerated { |
| 14 | 14 | ||
| 15 | struct BlockLinearSwizzle2DParams { | 15 | struct BlockLinearSwizzle2DParams { |
| 16 | std::array<u32, 3> origin; | 16 | alignas(16) std::array<u32, 3> origin; |
| 17 | std::array<s32, 3> destination; | 17 | alignas(16) std::array<s32, 3> destination; |
| 18 | u32 bytes_per_block_log2; | 18 | u32 bytes_per_block_log2; |
| 19 | u32 layer_stride; | 19 | u32 layer_stride; |
| 20 | u32 block_size; | 20 | u32 block_size; |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 62685a183..3a463d5db 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -17,26 +17,7 @@ | |||
| 17 | #include "video_core/textures/texture.h" | 17 | #include "video_core/textures/texture.h" |
| 18 | 18 | ||
| 19 | namespace Tegra::Texture { | 19 | namespace Tegra::Texture { |
| 20 | |||
| 21 | namespace { | 20 | namespace { |
| 22 | /** | ||
| 23 | * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. | ||
| 24 | * Calculates the offset of an (x, y) position within a swizzled texture. | ||
| 25 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | ||
| 26 | */ | ||
| 27 | constexpr SwizzleTable MakeSwizzleTableConst() { | ||
| 28 | SwizzleTable table{}; | ||
| 29 | for (u32 y = 0; y < table.size(); ++y) { | ||
| 30 | for (u32 x = 0; x < table[0].size(); ++x) { | ||
| 31 | table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + | ||
| 32 | (y % 2) * 16 + (x % 16); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | return table; | ||
| 36 | } | ||
| 37 | |||
| 38 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); | ||
| 39 | |||
| 40 | template <bool TO_LINEAR> | 21 | template <bool TO_LINEAR> |
| 41 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, | 22 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 42 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | 23 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { |
| @@ -91,10 +72,6 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe | |||
| 91 | } | 72 | } |
| 92 | } // Anonymous namespace | 73 | } // Anonymous namespace |
| 93 | 74 | ||
| 94 | SwizzleTable MakeSwizzleTable() { | ||
| 95 | return SWIZZLE_TABLE; | ||
| 96 | } | ||
| 97 | |||
| 98 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | 75 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 99 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, | 76 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 100 | u32 stride_alignment) { | 77 | u32 stride_alignment) { |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index d7cdc81e8..4c14cefbf 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -23,8 +23,22 @@ constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_ | |||
| 23 | 23 | ||
| 24 | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; | 24 | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; |
| 25 | 25 | ||
| 26 | /// Returns a z-order swizzle table | 26 | /** |
| 27 | SwizzleTable MakeSwizzleTable(); | 27 | * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. |
| 28 | * Calculates the offset of an (x, y) position within a swizzled texture. | ||
| 29 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | ||
| 30 | */ | ||
| 31 | constexpr SwizzleTable MakeSwizzleTable() { | ||
| 32 | SwizzleTable table{}; | ||
| 33 | for (u32 y = 0; y < table.size(); ++y) { | ||
| 34 | for (u32 x = 0; x < table[0].size(); ++x) { | ||
| 35 | table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + | ||
| 36 | (y % 2) * 16 + (x % 16); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | return table; | ||
| 40 | } | ||
| 41 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTable(); | ||
| 28 | 42 | ||
| 29 | /// Unswizzles a block linear texture into linear memory. | 43 | /// Unswizzles a block linear texture into linear memory. |
| 30 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, | 44 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |