diff options
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/textures/astc.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 2 |
5 files changed, 16 insertions, 47 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index eaba1b103..71327e233 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -14,9 +14,8 @@ | |||
| 14 | #define BINDING_6_TO_8_BUFFER 2 | 14 | #define BINDING_6_TO_8_BUFFER 2 |
| 15 | #define BINDING_7_TO_8_BUFFER 3 | 15 | #define BINDING_7_TO_8_BUFFER 3 |
| 16 | #define BINDING_8_TO_8_BUFFER 4 | 16 | #define BINDING_8_TO_8_BUFFER 4 |
| 17 | #define BINDING_BYTE_TO_16_BUFFER 5 | 17 | #define BINDING_SWIZZLE_BUFFER 5 |
| 18 | #define BINDING_SWIZZLE_BUFFER 6 | 18 | #define BINDING_OUTPUT_IMAGE 6 |
| 19 | #define BINDING_OUTPUT_IMAGE 7 | ||
| 20 | 19 | ||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | 20 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
| 22 | 21 | ||
| @@ -29,7 +28,6 @@ | |||
| 29 | #define BINDING_6_TO_8_BUFFER 3 | 28 | #define BINDING_6_TO_8_BUFFER 3 |
| 30 | #define BINDING_7_TO_8_BUFFER 4 | 29 | #define BINDING_7_TO_8_BUFFER 4 |
| 31 | #define BINDING_8_TO_8_BUFFER 5 | 30 | #define BINDING_8_TO_8_BUFFER 5 |
| 32 | #define BINDING_BYTE_TO_16_BUFFER 6 | ||
| 33 | #define BINDING_OUTPUT_IMAGE 0 | 31 | #define BINDING_OUTPUT_IMAGE 0 |
| 34 | 32 | ||
| 35 | #endif | 33 | #endif |
| @@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_ | |||
| 86 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | 84 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { |
| 87 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | 85 | uint REPLICATE_8_BIT_TO_8_TABLE[]; |
| 88 | }; | 86 | }; |
| 89 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 90 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 91 | }; | ||
| 92 | 87 | ||
| 93 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; | 88 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| 94 | 89 | ||
| @@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) { | |||
| 207 | } | 202 | } |
| 208 | 203 | ||
| 209 | uvec4 ReplicateByteTo16(uvec4 value) { | 204 | uvec4 ReplicateByteTo16(uvec4 value) { |
| 210 | return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], | 205 | return value * 0x101; |
| 211 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); | ||
| 212 | } | 206 | } |
| 213 | 207 | ||
| 214 | uint ReplicateBitTo7(uint value) { | 208 | uint ReplicateBitTo7(uint value) { |
| @@ -1327,6 +1321,9 @@ void main() { | |||
| 1327 | offset += swizzle; | 1321 | offset += swizzle; |
| 1328 | 1322 | ||
| 1329 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); | 1323 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); |
| 1324 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | ||
| 1325 | return; | ||
| 1326 | } | ||
| 1330 | uint block_index = | 1327 | uint block_index = |
| 1331 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; | 1328 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; |
| 1332 | 1329 | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 47fddcb6e..d57998cdc 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 83 | static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; | 83 | static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; |
| 84 | static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; | 84 | static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; |
| 85 | static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; | 85 | static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; |
| 86 | static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; | ||
| 87 | 86 | ||
| 88 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 87 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 89 | 88 | ||
| @@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 105 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, | 104 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, |
| 106 | offsetof(AstcBufferData, replicate_8_to_8), | 105 | offsetof(AstcBufferData, replicate_8_to_8), |
| 107 | sizeof(AstcBufferData::replicate_8_to_8)); | 106 | sizeof(AstcBufferData::replicate_8_to_8)); |
| 108 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle, | ||
| 109 | offsetof(AstcBufferData, replicate_byte_to_16), | ||
| 110 | sizeof(AstcBufferData::replicate_byte_to_16)); | ||
| 111 | 107 | ||
| 112 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 108 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 113 | glUniform2ui(1, tile_size.width, tile_size.height); | 109 | glUniform2ui(1, tile_size.width, tile_size.height); |
| @@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 137 | 133 | ||
| 138 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | 134 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); |
| 139 | } | 135 | } |
| 136 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | ||
| 140 | program_manager.RestoreGuestCompute(); | 137 | program_manager.RestoreGuestCompute(); |
| 141 | } | 138 | } |
| 142 | 139 | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index e11406e58..123bed794 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; | |||
| 40 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; | 40 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; |
| 41 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; | 41 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; |
| 42 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; | 42 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; |
| 43 | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; | 43 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5; |
| 44 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; | 44 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6; |
| 45 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; | 45 | constexpr size_t ASTC_NUM_BINDINGS = 7; |
| 46 | 46 | ||
| 47 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 47 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 48 | return { | 48 | return { |
| @@ -71,7 +71,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding | |||
| 71 | }}; | 71 | }}; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | 74 | std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { |
| 75 | return {{ | 75 | return {{ |
| 76 | { | 76 | { |
| 77 | .binding = ASTC_BINDING_INPUT_BUFFER, | 77 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| @@ -109,13 +109,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | |||
| 109 | .pImmutableSamplers = nullptr, | 109 | .pImmutableSamplers = nullptr, |
| 110 | }, | 110 | }, |
| 111 | { | 111 | { |
| 112 | .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||
| 113 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 114 | .descriptorCount = 1, | ||
| 115 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 116 | .pImmutableSamplers = nullptr, | ||
| 117 | }, | ||
| 118 | { | ||
| 119 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | 112 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, |
| 120 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 113 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 121 | .descriptorCount = 1, | 114 | .descriptorCount = 1, |
| @@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 143 | }; | 136 | }; |
| 144 | } | 137 | } |
| 145 | 138 | ||
| 146 | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { | 139 | std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> |
| 140 | BuildASTCPassDescriptorUpdateTemplateEntry() { | ||
| 147 | return {{ | 141 | return {{ |
| 148 | { | 142 | { |
| 149 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | 143 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, |
| @@ -186,14 +180,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 186 | .stride = sizeof(DescriptorUpdateEntry), | 180 | .stride = sizeof(DescriptorUpdateEntry), |
| 187 | }, | 181 | }, |
| 188 | { | 182 | { |
| 189 | .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER, | ||
| 190 | .dstArrayElement = 0, | ||
| 191 | .descriptorCount = 1, | ||
| 192 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 193 | .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 194 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 195 | }, | ||
| 196 | { | ||
| 197 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | 183 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, |
| 198 | .dstArrayElement = 0, | 184 | .dstArrayElement = 0, |
| 199 | .descriptorCount = 1, | 185 | .descriptorCount = 1, |
| @@ -222,15 +208,6 @@ struct AstcPushConstants { | |||
| 222 | u32 block_height_mask; | 208 | u32 block_height_mask; |
| 223 | }; | 209 | }; |
| 224 | 210 | ||
| 225 | struct AstcBufferData { | ||
| 226 | decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE; | ||
| 227 | decltype(EncodingsValues) encoding_values = EncodingsValues; | ||
| 228 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | ||
| 229 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | ||
| 230 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | ||
| 231 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||
| 232 | } constexpr ASTC_BUFFER_DATA; | ||
| 233 | |||
| 234 | } // Anonymous namespace | 211 | } // Anonymous namespace |
| 235 | 212 | ||
| 236 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 213 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| @@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 517 | sizeof(AstcBufferData::replicate_7_to_8)); | 494 | sizeof(AstcBufferData::replicate_7_to_8)); |
| 518 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), | 495 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), |
| 519 | sizeof(AstcBufferData::replicate_8_to_8)); | 496 | sizeof(AstcBufferData::replicate_8_to_8)); |
| 520 | update_descriptor_queue.AddBuffer(*data_buffer, | ||
| 521 | offsetof(AstcBufferData, replicate_byte_to_16), | ||
| 522 | sizeof(AstcBufferData::replicate_byte_to_16)); | ||
| 523 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), | 497 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), |
| 524 | sizeof(SWIZZLE_TABLE)); | 498 | sizeof(SWIZZLE_TABLE)); |
| 525 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 499 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| @@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 569 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 543 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 570 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); | 544 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); |
| 571 | }); | 545 | }); |
| 546 | scheduler.Finish(); | ||
| 572 | } | 547 | } |
| 573 | 548 | ||
| 574 | } // namespace Vulkan | 549 | } // namespace Vulkan |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 9b2177ebd..b6e2022f2 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { | |||
| 551 | } | 551 | } |
| 552 | } | 552 | } |
| 553 | } | 553 | } |
| 554 | |||
| 555 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 554 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | 556 | static constexpr u32 ReplicateByteTo16(std::size_t value) { |
| 555 | return REPLICATE_BYTE_TO_16_TABLE[value]; | 557 | return REPLICATE_BYTE_TO_16_TABLE[value]; |
| 556 | } | 558 | } |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index c1c37dfe7..441e8eb04 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() { | |||
| 116 | return table; | 116 | return table; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 120 | constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | 119 | constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); |
| 121 | constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | 120 | constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); |
| 122 | constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | 121 | constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); |
| @@ -126,7 +125,6 @@ struct AstcBufferData { | |||
| 126 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; | 125 | decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; |
| 127 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; | 126 | decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; |
| 128 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | 127 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; |
| 129 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | ||
| 130 | } constexpr ASTC_BUFFER_DATA; | 128 | } constexpr ASTC_BUFFER_DATA; |
| 131 | 129 | ||
| 132 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 130 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |