diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 61 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 142 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 9 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 38 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.cpp | 20 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_wrapper.h | 5 |
15 files changed, 298 insertions, 127 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e4f3c8e35..d6399bf24 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -239,8 +239,7 @@ private: | |||
| 239 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | 239 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, |
| 240 | std::span<const BufferCopy> copies); | 240 | std::span<const BufferCopy> copies); |
| 241 | 241 | ||
| 242 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | 242 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); |
| 243 | std::span<const BufferCopy> copies); | ||
| 244 | 243 | ||
| 245 | void DeleteBuffer(BufferId buffer_id); | 244 | void DeleteBuffer(BufferId buffer_id); |
| 246 | 245 | ||
| @@ -362,11 +361,17 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | |||
| 362 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 361 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 363 | const u8* const mapped_memory = download_staging.mapped_span.data(); | 362 | const u8* const mapped_memory = download_staging.mapped_span.data(); |
| 364 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | 363 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); |
| 364 | for (BufferCopy& copy : copies) { | ||
| 365 | // Modify copies to have the staging offset in mind | ||
| 366 | copy.dst_offset += download_staging.offset; | ||
| 367 | } | ||
| 365 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); | 368 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); |
| 366 | runtime.Finish(); | 369 | runtime.Finish(); |
| 367 | for (const BufferCopy& copy : copies) { | 370 | for (const BufferCopy& copy : copies) { |
| 368 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | 371 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 369 | const u8* copy_mapped_memory = mapped_memory + copy.dst_offset; | 372 | // Undo the modified offset |
| 373 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 374 | const u8* copy_mapped_memory = mapped_memory + dst_offset; | ||
| 370 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); | 375 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); |
| 371 | } | 376 | } |
| 372 | } else { | 377 | } else { |
| @@ -554,7 +559,9 @@ void BufferCache<P>::PopAsyncFlushes() { | |||
| 554 | } | 559 | } |
| 555 | if constexpr (USE_MEMORY_MAPS) { | 560 | if constexpr (USE_MEMORY_MAPS) { |
| 556 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 561 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 557 | for (const auto [copy, buffer_id] : downloads) { | 562 | for (auto& [copy, buffer_id] : downloads) { |
| 563 | // Have in mind the staging buffer offset for the copy | ||
| 564 | copy.dst_offset += download_staging.offset; | ||
| 558 | const std::array copies{copy}; | 565 | const std::array copies{copy}; |
| 559 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); | 566 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); |
| 560 | } | 567 | } |
| @@ -562,7 +569,9 @@ void BufferCache<P>::PopAsyncFlushes() { | |||
| 562 | for (const auto [copy, buffer_id] : downloads) { | 569 | for (const auto [copy, buffer_id] : downloads) { |
| 563 | const Buffer& buffer = slot_buffers[buffer_id]; | 570 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 564 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 571 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 565 | const u8* read_mapped_memory = download_staging.mapped_span.data() + copy.dst_offset; | 572 | // Undo the modified offset |
| 573 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 574 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 566 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 575 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); |
| 567 | } | 576 | } |
| 568 | } else { | 577 | } else { |
| @@ -1117,13 +1126,16 @@ void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | |||
| 1117 | 1126 | ||
| 1118 | template <class P> | 1127 | template <class P> |
| 1119 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | 1128 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, |
| 1120 | std::span<const BufferCopy> copies) { | 1129 | std::span<BufferCopy> copies) { |
| 1121 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | 1130 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); |
| 1122 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1131 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1123 | for (const BufferCopy& copy : copies) { | 1132 | for (BufferCopy& copy : copies) { |
| 1124 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1125 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1133 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1134 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1126 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1135 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); |
| 1136 | |||
| 1137 | // Apply the staging offset | ||
| 1138 | copy.src_offset += upload_staging.offset; | ||
| 1127 | } | 1139 | } |
| 1128 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | 1140 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); |
| 1129 | } | 1141 | } |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 37572ab28..31eb54123 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -550,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | |||
| 550 | } | 550 | } |
| 551 | 551 | ||
| 552 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, | 552 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 553 | size_t buffer_offset, | ||
| 554 | std::span<const SwizzleParameters> swizzles) { | 553 | std::span<const SwizzleParameters> swizzles) { |
| 555 | switch (image.info.type) { | 554 | switch (image.info.type) { |
| 556 | case ImageType::e2D: | 555 | case ImageType::e2D: |
| 557 | return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); | 556 | return util_shaders.BlockLinearUpload2D(image, map, swizzles); |
| 558 | case ImageType::e3D: | 557 | case ImageType::e3D: |
| 559 | return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); | 558 | return util_shaders.BlockLinearUpload3D(image, map, swizzles); |
| 560 | case ImageType::Linear: | 559 | case ImageType::Linear: |
| 561 | return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); | 560 | return util_shaders.PitchUpload(image, map, swizzles); |
| 562 | default: | 561 | default: |
| 563 | UNREACHABLE(); | 562 | UNREACHABLE(); |
| 564 | break; | 563 | break; |
| @@ -710,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 710 | } | 709 | } |
| 711 | } | 710 | } |
| 712 | 711 | ||
| 713 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 712 | void Image::UploadMemory(const ImageBufferMap& map, |
| 714 | std::span<const VideoCommon::BufferImageCopy> copies) { | 713 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 715 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); | 714 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); |
| 716 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); | 715 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); |
| 717 | 716 | ||
| 718 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | 717 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| 719 | 718 | ||
| @@ -729,19 +728,19 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 729 | current_image_height = copy.buffer_image_height; | 728 | current_image_height = copy.buffer_image_height; |
| 730 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); | 729 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); |
| 731 | } | 730 | } |
| 732 | CopyBufferToImage(copy, buffer_offset); | 731 | CopyBufferToImage(copy, map.offset); |
| 733 | } | 732 | } |
| 734 | } | 733 | } |
| 735 | 734 | ||
| 736 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 735 | void Image::UploadMemory(const ImageBufferMap& map, |
| 737 | std::span<const VideoCommon::BufferCopy> copies) { | 736 | std::span<const VideoCommon::BufferCopy> copies) { |
| 738 | for (const VideoCommon::BufferCopy& copy : copies) { | 737 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 739 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + buffer_offset, | 738 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, |
| 740 | copy.dst_offset, copy.size); | 739 | copy.dst_offset, copy.size); |
| 741 | } | 740 | } |
| 742 | } | 741 | } |
| 743 | 742 | ||
| 744 | void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | 743 | void Image::DownloadMemory(ImageBufferMap& map, |
| 745 | std::span<const VideoCommon::BufferImageCopy> copies) { | 744 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 746 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 745 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 747 | 746 | ||
| @@ -760,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | |||
| 760 | current_image_height = copy.buffer_image_height; | 759 | current_image_height = copy.buffer_image_height; |
| 761 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | 760 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); |
| 762 | } | 761 | } |
| 763 | CopyImageToBuffer(copy, buffer_offset); | 762 | CopyImageToBuffer(copy, map.offset); |
| 764 | } | 763 | } |
| 765 | } | 764 | } |
| 766 | 765 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 60d08d6d6..874cf54f4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -35,6 +35,7 @@ struct ImageBufferMap { | |||
| 35 | ~ImageBufferMap(); | 35 | ~ImageBufferMap(); |
| 36 | 36 | ||
| 37 | std::span<u8> mapped_span; | 37 | std::span<u8> mapped_span; |
| 38 | size_t offset = 0; | ||
| 38 | OGLSync* sync; | 39 | OGLSync* sync; |
| 39 | GLuint buffer; | 40 | GLuint buffer; |
| 40 | }; | 41 | }; |
| @@ -78,7 +79,7 @@ public: | |||
| 78 | Tegra::Engines::Fermi2D::Filter filter, | 79 | Tegra::Engines::Fermi2D::Filter filter, |
| 79 | Tegra::Engines::Fermi2D::Operation operation); | 80 | Tegra::Engines::Fermi2D::Operation operation); |
| 80 | 81 | ||
| 81 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 82 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 82 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 83 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 83 | 84 | ||
| 84 | void InsertUploadMemoryBarrier(); | 85 | void InsertUploadMemoryBarrier(); |
| @@ -137,14 +138,12 @@ public: | |||
| 137 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 138 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 138 | VAddr cpu_addr); | 139 | VAddr cpu_addr); |
| 139 | 140 | ||
| 140 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 141 | void UploadMemory(const ImageBufferMap& map, |
| 141 | std::span<const VideoCommon::BufferImageCopy> copies); | 142 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 142 | 143 | ||
| 143 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 144 | void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); |
| 144 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 145 | 145 | ||
| 146 | void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | 146 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 147 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 148 | 147 | ||
| 149 | GLuint Handle() const noexcept { | 148 | GLuint Handle() const noexcept { |
| 150 | return texture.handle; | 149 | return texture.handle; |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index aeb36551c..1b58e8617 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 63 | 63 | ||
| 64 | UtilShaders::~UtilShaders() = default; | 64 | UtilShaders::~UtilShaders() = default; |
| 65 | 65 | ||
| 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 67 | std::span<const SwizzleParameters> swizzles) { | 67 | std::span<const SwizzleParameters> swizzles) { |
| 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; |
| @@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 72 | 72 | ||
| 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); |
| 74 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); | 74 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 76 | 76 | ||
| 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| 78 | for (const SwizzleParameters& swizzle : swizzles) { | 78 | for (const SwizzleParameters& swizzle : swizzles) { |
| 79 | const Extent3D num_tiles = swizzle.num_tiles; | 79 | const Extent3D num_tiles = swizzle.num_tiles; |
| 80 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 80 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 81 | 81 | ||
| 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| @@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 100 | program_manager.RestoreGuestCompute(); | 100 | program_manager.RestoreGuestCompute(); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, |
| 104 | std::span<const SwizzleParameters> swizzles) { | 104 | std::span<const SwizzleParameters> swizzles) { |
| 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; |
| 106 | 106 | ||
| @@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 110 | 110 | ||
| 111 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); | 111 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); |
| 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 114 | 114 | ||
| 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| 116 | for (const SwizzleParameters& swizzle : swizzles) { | 116 | for (const SwizzleParameters& swizzle : swizzles) { |
| 117 | const Extent3D num_tiles = swizzle.num_tiles; | 117 | const Extent3D num_tiles = swizzle.num_tiles; |
| 118 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 118 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 119 | 119 | ||
| 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| @@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 141 | program_manager.RestoreGuestCompute(); | 141 | program_manager.RestoreGuestCompute(); |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, |
| 145 | std::span<const SwizzleParameters> swizzles) { | 145 | std::span<const SwizzleParameters> swizzles) { |
| 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; |
| @@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 159 | "Non-power of two images are not implemented"); | 159 | "Non-power of two images are not implemented"); |
| 160 | 160 | ||
| 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); |
| 162 | glFlushMappedNamedBufferRange(map.buffer, buffer_offset, image.guest_size_bytes); | 162 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 163 | glUniform2ui(LOC_ORIGIN, 0, 0); | 163 | glUniform2ui(LOC_ORIGIN, 0, 0); |
| 164 | glUniform2i(LOC_DESTINATION, 0, 0); | 164 | glUniform2i(LOC_DESTINATION, 0, 0); |
| 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); |
| @@ -167,7 +167,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); |
| 168 | for (const SwizzleParameters& swizzle : swizzles) { | 168 | for (const SwizzleParameters& swizzle : swizzles) { |
| 169 | const Extent3D num_tiles = swizzle.num_tiles; | 169 | const Extent3D num_tiles = swizzle.num_tiles; |
| 170 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 170 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 171 | 171 | ||
| 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index bec026bc3..7b1d16b09 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -24,13 +24,13 @@ public: | |||
| 24 | explicit UtilShaders(ProgramManager& program_manager); | 24 | explicit UtilShaders(ProgramManager& program_manager); |
| 25 | ~UtilShaders(); | 25 | ~UtilShaders(); |
| 26 | 26 | ||
| 27 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 27 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 28 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 28 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 29 | 29 | ||
| 30 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 30 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, |
| 31 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 31 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 32 | 32 | ||
| 33 | void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 33 | void PitchUpload(Image& image, const ImageBufferMap& map, |
| 34 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 34 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 35 | 35 | ||
| 36 | void CopyBC4(Image& dst_image, Image& src_image, | 36 | void CopyBC4(Image& dst_image, Image& src_image, |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 48fc5d966..4f1e4ec28 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -138,17 +138,18 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, | 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, |
| 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, | 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, |
| 140 | u32 offset, [[maybe_unused]] u32 size) { | 140 | u32 offset, [[maybe_unused]] u32 size) { |
| 141 | VkIndexType index_type = MaxwellToVK::IndexFormat(index_format); | 141 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); |
| 142 | VkDeviceSize vk_offset = offset; | ||
| 142 | if (topology == PrimitiveTopology::Quads) { | 143 | if (topology == PrimitiveTopology::Quads) { |
| 143 | index_type = VK_INDEX_TYPE_UINT32; | 144 | vk_index_type = VK_INDEX_TYPE_UINT32; |
| 144 | std::tie(buffer, offset) = | 145 | std::tie(buffer, vk_offset) = |
| 145 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | 146 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); |
| 146 | } else if (index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | 147 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { |
| 147 | index_type = VK_INDEX_TYPE_UINT16; | 148 | vk_index_type = VK_INDEX_TYPE_UINT16; |
| 148 | std::tie(buffer, offset) = uint8_pass.Assemble(num_indices, buffer, offset); | 149 | std::tie(buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); |
| 149 | } | 150 | } |
| 150 | scheduler.Record([buffer, offset, index_type](vk::CommandBuffer cmdbuf) { | 151 | scheduler.Record([buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { |
| 151 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | 152 | cmdbuf.BindIndexBuffer(buffer, vk_offset, vk_index_type); |
| 152 | }); | 153 | }); |
| 153 | } | 154 | } |
| 154 | 155 | ||
| @@ -251,10 +252,10 @@ void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle | |||
| 251 | } | 252 | } |
| 252 | } | 253 | } |
| 253 | scheduler.RequestOutsideRenderPassOperationContext(); | 254 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 254 | scheduler.Record([src_buffer = staging.buffer, dst_buffer = *quad_array_lut, | 255 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, |
| 255 | size_bytes](vk::CommandBuffer cmdbuf) { | 256 | dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { |
| 256 | const VkBufferCopy copy{ | 257 | const VkBufferCopy copy{ |
| 257 | .srcOffset = 0, | 258 | .srcOffset = src_offset, |
| 258 | .dstOffset = 0, | 259 | .dstOffset = 0, |
| 259 | .size = size_bytes, | 260 | .size = size_bytes, |
| 260 | }; | 261 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a4fdcdf81..2f9a7b028 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | ||
| 13 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 14 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 15 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -148,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 148 | 149 | ||
| 149 | Uint8Pass::~Uint8Pass() = default; | 150 | Uint8Pass::~Uint8Pass() = default; |
| 150 | 151 | ||
| 151 | std::pair<VkBuffer, u32> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | 152 | std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 152 | u32 src_offset) { | 153 | u32 src_offset) { |
| 153 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 154 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 154 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 155 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 155 | 156 | ||
| 156 | update_descriptor_queue.Acquire(); | 157 | update_descriptor_queue.Acquire(); |
| 157 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 158 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 158 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); | 159 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 159 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 160 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 160 | 161 | ||
| 161 | scheduler.RequestOutsideRenderPassOperationContext(); | 162 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 162 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 163 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 163 | num_vertices](vk::CommandBuffer cmdbuf) { | 164 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 164 | constexpr u32 dispatch_size = 1024; | 165 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 166 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 170 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 171 | }; | ||
| 165 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 172 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 166 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 173 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 167 | cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); | 174 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 168 | |||
| 169 | VkBufferMemoryBarrier barrier; | ||
| 170 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 171 | barrier.pNext = nullptr; | ||
| 172 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 173 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 174 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 175 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 176 | barrier.buffer = buffer; | ||
| 177 | barrier.offset = 0; | ||
| 178 | barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||
| 179 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 175 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 180 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 176 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 181 | }); | 177 | }); |
| 182 | return {staging.buffer, 0}; | 178 | return {staging.buffer, staging.offset}; |
| 183 | } | 179 | } |
| 184 | 180 | ||
| 185 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 181 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| @@ -194,7 +190,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 194 | 190 | ||
| 195 | QuadIndexedPass::~QuadIndexedPass() = default; | 191 | QuadIndexedPass::~QuadIndexedPass() = default; |
| 196 | 192 | ||
| 197 | std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( | 193 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( |
| 198 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 194 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 199 | VkBuffer src_buffer, u32 src_offset) { | 195 | VkBuffer src_buffer, u32 src_offset) { |
| 200 | const u32 index_shift = [index_format] { | 196 | const u32 index_shift = [index_format] { |
| @@ -217,34 +213,29 @@ std::pair<VkBuffer, u32> QuadIndexedPass::Assemble( | |||
| 217 | 213 | ||
| 218 | update_descriptor_queue.Acquire(); | 214 | update_descriptor_queue.Acquire(); |
| 219 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 215 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 220 | update_descriptor_queue.AddBuffer(staging.buffer, 0, staging_size); | 216 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 221 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 217 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 222 | 218 | ||
| 223 | scheduler.RequestOutsideRenderPassOperationContext(); | 219 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 224 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 220 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 225 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 221 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { |
| 226 | static constexpr u32 dispatch_size = 1024; | 222 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 223 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 227 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 228 | }; | ||
| 227 | const std::array push_constants = {base_vertex, index_shift}; | 229 | const std::array push_constants = {base_vertex, index_shift}; |
| 228 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 230 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 229 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 231 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 230 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 232 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), |
| 231 | &push_constants); | 233 | &push_constants); |
| 232 | cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | 234 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 233 | |||
| 234 | VkBufferMemoryBarrier barrier; | ||
| 235 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 236 | barrier.pNext = nullptr; | ||
| 237 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 238 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 239 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 240 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 241 | barrier.buffer = buffer; | ||
| 242 | barrier.offset = 0; | ||
| 243 | barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||
| 244 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 235 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 245 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 236 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 246 | }); | 237 | }); |
| 247 | return {staging.buffer, 0}; | 238 | return {staging.buffer, staging.offset}; |
| 248 | } | 239 | } |
| 249 | 240 | ||
| 250 | } // namespace Vulkan | 241 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 4904019f5..17d781d99 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -50,7 +50,8 @@ public: | |||
| 50 | 50 | ||
| 51 | /// Assemble uint8 indices into an uint16 index buffer | 51 | /// Assemble uint8 indices into an uint16 index buffer |
| 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is | 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is |
| 53 | std::pair<VkBuffer, u32> Assemble(u32 num_vertices, VkBuffer src_buffer, u32 src_offset); | 53 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 54 | u32 src_offset); | ||
| 54 | 55 | ||
| 55 | private: | 56 | private: |
| 56 | VKScheduler& scheduler; | 57 | VKScheduler& scheduler; |
| @@ -66,9 +67,9 @@ public: | |||
| 66 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 67 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 67 | ~QuadIndexedPass(); | 68 | ~QuadIndexedPass(); |
| 68 | 69 | ||
| 69 | std::pair<VkBuffer, u32> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | 70 | std::pair<VkBuffer, VkDeviceSize> Assemble( |
| 70 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | 71 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |
| 71 | u32 src_offset); | 72 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset); |
| 72 | 73 | ||
| 73 | private: | 74 | private: |
| 74 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..275d740b8 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 12 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -17,14 +18,117 @@ | |||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | namespace { | ||
| 22 | // Maximum potential alignment of a Vulkan buffer | ||
| 23 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||
| 24 | // Maximum size to put elements in the stream buffer | ||
| 25 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; | ||
| 26 | // Stream buffer size in bytes | ||
| 27 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||
| 28 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 29 | |||
| 30 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||
| 31 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 32 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||
| 33 | |||
| 34 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||
| 35 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 39 | VkMemoryPropertyFlags flags) noexcept { | ||
| 40 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 41 | if (((type_mask >> type_index) & 1) == 0) { | ||
| 42 | // Memory type is incompatible | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||
| 46 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 47 | // Memory type doesn't have the flags we want | ||
| 48 | continue; | ||
| 49 | } | ||
| 50 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||
| 51 | // Memory heap is not suitable for streaming | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | // Success! | ||
| 55 | return type_index; | ||
| 56 | } | ||
| 57 | return std::nullopt; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { | ||
| 61 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | ||
| 62 | std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||
| 63 | if (type) { | ||
| 64 | return *type; | ||
| 65 | } | ||
| 66 | // Otherwise try without the DEVICE_LOCAL_BIT | ||
| 67 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||
| 68 | if (type) { | ||
| 69 | return *type; | ||
| 70 | } | ||
| 71 | // This should never happen, and in case it does, signal it as an out of memory situation | ||
| 72 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||
| 73 | } | ||
| 74 | |||
| 75 | size_t Region(size_t iterator) noexcept { | ||
| 76 | return iterator / REGION_SIZE; | ||
| 77 | } | ||
| 78 | } // Anonymous namespace | ||
| 20 | 79 | ||
| 21 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 80 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 22 | VKScheduler& scheduler_) | 81 | VKScheduler& scheduler_) |
| 23 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} | 82 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 83 | const vk::Device& dev = device.GetLogical(); | ||
| 84 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||
| 85 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 86 | .pNext = nullptr, | ||
| 87 | .flags = 0, | ||
| 88 | .size = STREAM_BUFFER_SIZE, | ||
| 89 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | ||
| 90 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 91 | .queueFamilyIndexCount = 0, | ||
| 92 | .pQueueFamilyIndices = nullptr, | ||
| 93 | }); | ||
| 94 | if (device.HasDebuggingToolAttached()) { | ||
| 95 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||
| 96 | } | ||
| 97 | VkMemoryDedicatedRequirements dedicated_reqs{ | ||
| 98 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||
| 99 | .pNext = nullptr, | ||
| 100 | .prefersDedicatedAllocation = VK_FALSE, | ||
| 101 | .requiresDedicatedAllocation = VK_FALSE, | ||
| 102 | }; | ||
| 103 | const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||
| 104 | const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||
| 105 | dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||
| 106 | const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||
| 107 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||
| 108 | .pNext = nullptr, | ||
| 109 | .image = nullptr, | ||
| 110 | .buffer = *stream_buffer, | ||
| 111 | }; | ||
| 112 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||
| 113 | stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ | ||
| 114 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||
| 115 | .pNext = make_dedicated ? &dedicated_info : nullptr, | ||
| 116 | .allocationSize = requirements.size, | ||
| 117 | .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), | ||
| 118 | }); | ||
| 119 | if (device.HasDebuggingToolAttached()) { | ||
| 120 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||
| 121 | } | ||
| 122 | stream_buffer.BindMemory(*stream_memory, 0); | ||
| 123 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||
| 124 | } | ||
| 24 | 125 | ||
| 25 | StagingBufferPool::~StagingBufferPool() = default; | 126 | StagingBufferPool::~StagingBufferPool() = default; |
| 26 | 127 | ||
| 27 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { | 128 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { |
| 129 | if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { | ||
| 130 | return GetStreamBuffer(size); | ||
| 131 | } | ||
| 28 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | 132 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { |
| 29 | return *ref; | 133 | return *ref; |
| 30 | } | 134 | } |
| @@ -39,6 +143,42 @@ void StagingBufferPool::TickFrame() { | |||
| 39 | ReleaseCache(MemoryUsage::Download); | 143 | ReleaseCache(MemoryUsage::Download); |
| 40 | } | 144 | } |
| 41 | 145 | ||
| 146 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||
| 147 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; | ||
| 148 | ++region) { | ||
| 149 | sync_ticks[region] = scheduler.CurrentTick(); | ||
| 150 | } | ||
| 151 | used_iterator = iterator; | ||
| 152 | |||
| 153 | for (size_t region = Region(free_iterator) + 1, | ||
| 154 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); | ||
| 155 | region < region_end; ++region) { | ||
| 156 | scheduler.Wait(sync_ticks[region]); | ||
| 157 | } | ||
| 158 | if (iterator + size > free_iterator) { | ||
| 159 | free_iterator = iterator + size; | ||
| 160 | } | ||
| 161 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 162 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { | ||
| 163 | sync_ticks[region] = scheduler.CurrentTick(); | ||
| 164 | } | ||
| 165 | used_iterator = 0; | ||
| 166 | iterator = 0; | ||
| 167 | free_iterator = size; | ||
| 168 | |||
| 169 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 170 | scheduler.Wait(sync_ticks[region]); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | const size_t offset = iterator; | ||
| 174 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 175 | return StagingBufferRef{ | ||
| 176 | .buffer = *stream_buffer, | ||
| 177 | .offset = static_cast<VkDeviceSize>(offset), | ||
| 178 | .mapped_span = std::span<u8>(stream_pointer + offset, size), | ||
| 179 | }; | ||
| 180 | } | ||
| 181 | |||
| 42 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, | 182 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, |
| 43 | MemoryUsage usage) { | 183 | MemoryUsage usage) { |
| 44 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; | 184 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..4ed99c0df 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -19,11 +19,14 @@ class VKScheduler; | |||
| 19 | 19 | ||
| 20 | struct StagingBufferRef { | 20 | struct StagingBufferRef { |
| 21 | VkBuffer buffer; | 21 | VkBuffer buffer; |
| 22 | VkDeviceSize offset; | ||
| 22 | std::span<u8> mapped_span; | 23 | std::span<u8> mapped_span; |
| 23 | }; | 24 | }; |
| 24 | 25 | ||
| 25 | class StagingBufferPool { | 26 | class StagingBufferPool { |
| 26 | public: | 27 | public: |
| 28 | static constexpr size_t NUM_SYNCS = 16; | ||
| 29 | |||
| 27 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, | 30 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, |
| 28 | VKScheduler& scheduler); | 31 | VKScheduler& scheduler); |
| 29 | ~StagingBufferPool(); | 32 | ~StagingBufferPool(); |
| @@ -33,6 +36,11 @@ public: | |||
| 33 | void TickFrame(); | 36 | void TickFrame(); |
| 34 | 37 | ||
| 35 | private: | 38 | private: |
| 39 | struct StreamBufferCommit { | ||
| 40 | size_t upper_bound; | ||
| 41 | u64 tick; | ||
| 42 | }; | ||
| 43 | |||
| 36 | struct StagingBuffer { | 44 | struct StagingBuffer { |
| 37 | vk::Buffer buffer; | 45 | vk::Buffer buffer; |
| 38 | MemoryCommit commit; | 46 | MemoryCommit commit; |
| @@ -42,6 +50,7 @@ private: | |||
| 42 | StagingBufferRef Ref() const noexcept { | 50 | StagingBufferRef Ref() const noexcept { |
| 43 | return { | 51 | return { |
| 44 | .buffer = *buffer, | 52 | .buffer = *buffer, |
| 53 | .offset = 0, | ||
| 45 | .mapped_span = mapped_span, | 54 | .mapped_span = mapped_span, |
| 46 | }; | 55 | }; |
| 47 | } | 56 | } |
| @@ -56,6 +65,8 @@ private: | |||
| 56 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; | 65 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; |
| 57 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; | 66 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; |
| 58 | 67 | ||
| 68 | StagingBufferRef GetStreamBuffer(size_t size); | ||
| 69 | |||
| 59 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); | 70 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); |
| 60 | 71 | ||
| 61 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); | 72 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); |
| @@ -70,6 +81,15 @@ private: | |||
| 70 | MemoryAllocator& memory_allocator; | 81 | MemoryAllocator& memory_allocator; |
| 71 | VKScheduler& scheduler; | 82 | VKScheduler& scheduler; |
| 72 | 83 | ||
| 84 | vk::Buffer stream_buffer; | ||
| 85 | vk::DeviceMemory stream_memory; | ||
| 86 | u8* stream_pointer = nullptr; | ||
| 87 | |||
| 88 | size_t iterator = 0; | ||
| 89 | size_t used_iterator = 0; | ||
| 90 | size_t free_iterator = 0; | ||
| 91 | std::array<u64, NUM_SYNCS> sync_ticks{}; | ||
| 92 | |||
| 73 | StagingBuffersCache device_local_cache; | 93 | StagingBuffersCache device_local_cache; |
| 74 | StagingBuffersCache upload_cache; | 94 | StagingBuffersCache upload_cache; |
| 75 | StagingBuffersCache download_cache; | 95 | StagingBuffersCache download_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1eeb45ca9..22a1014a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -818,11 +818,10 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 818 | } | 818 | } |
| 819 | } | 819 | } |
| 820 | 820 | ||
| 821 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 821 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 822 | std::span<const BufferImageCopy> copies) { | ||
| 823 | // TODO: Move this to another API | 822 | // TODO: Move this to another API |
| 824 | scheduler->RequestOutsideRenderPassOperationContext(); | 823 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 825 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 824 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 826 | const VkBuffer src_buffer = map.buffer; | 825 | const VkBuffer src_buffer = map.buffer; |
| 827 | const VkImage vk_image = *image; | 826 | const VkImage vk_image = *image; |
| 828 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 827 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| @@ -833,11 +832,11 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | |||
| 833 | }); | 832 | }); |
| 834 | } | 833 | } |
| 835 | 834 | ||
| 836 | void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 835 | void Image::UploadMemory(const StagingBufferRef& map, |
| 837 | std::span<const VideoCommon::BufferCopy> copies) { | 836 | std::span<const VideoCommon::BufferCopy> copies) { |
| 838 | // TODO: Move this to another API | 837 | // TODO: Move this to another API |
| 839 | scheduler->RequestOutsideRenderPassOperationContext(); | 838 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 840 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | 839 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); |
| 841 | const VkBuffer src_buffer = map.buffer; | 840 | const VkBuffer src_buffer = map.buffer; |
| 842 | const VkBuffer dst_buffer = *buffer; | 841 | const VkBuffer dst_buffer = *buffer; |
| 843 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 842 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| @@ -846,9 +845,8 @@ void Image::UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | |||
| 846 | }); | 845 | }); |
| 847 | } | 846 | } |
| 848 | 847 | ||
| 849 | void Image::DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, | 848 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 850 | std::span<const BufferImageCopy> copies) { | 849 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 851 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | ||
| 852 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, | 850 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, |
| 853 | vk_copies](vk::CommandBuffer cmdbuf) { | 851 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 854 | const VkImageMemoryBarrier read_barrier{ | 852 | const VkImageMemoryBarrier read_barrier{ |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4558c3297..b08c23459 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -82,7 +82,7 @@ struct TextureCacheRuntime { | |||
| 82 | return false; | 82 | return false; |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, size_t, | 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, |
| 86 | std::span<const VideoCommon::SwizzleParameters>) { | 86 | std::span<const VideoCommon::SwizzleParameters>) { |
| 87 | UNREACHABLE(); | 87 | UNREACHABLE(); |
| 88 | } | 88 | } |
| @@ -100,13 +100,12 @@ public: | |||
| 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 101 | VAddr cpu_addr); | 101 | VAddr cpu_addr); |
| 102 | 102 | ||
| 103 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 103 | void UploadMemory(const StagingBufferRef& map, |
| 104 | std::span<const VideoCommon::BufferImageCopy> copies); | 104 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 105 | 105 | ||
| 106 | void UploadMemory(const StagingBufferRef& map, size_t buffer_offset, | 106 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); |
| 107 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 108 | 107 | ||
| 109 | void DownloadMemory(const StagingBufferRef& map, size_t buffer_offset, | 108 | void DownloadMemory(const StagingBufferRef& map, |
| 110 | std::span<const VideoCommon::BufferImageCopy> copies); | 109 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 111 | 110 | ||
| 112 | [[nodiscard]] VkImage Handle() const noexcept { | 111 | [[nodiscard]] VkImage Handle() const noexcept { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f336b705f..b1da69971 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -212,7 +212,7 @@ private: | |||
| 212 | 212 | ||
| 213 | /// Upload data from guest to an image | 213 | /// Upload data from guest to an image |
| 214 | template <typename StagingBuffer> | 214 | template <typename StagingBuffer> |
| 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset); | 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer); |
| 216 | 216 | ||
| 217 | /// Find or create an image view from a guest descriptor | 217 | /// Find or create an image view from a guest descriptor |
| 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| @@ -592,7 +592,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 592 | Image& image = slot_images[image_id]; | 592 | Image& image = slot_images[image_id]; |
| 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 594 | const auto copies = FullDownloadCopies(image.info); | 594 | const auto copies = FullDownloadCopies(image.info); |
| 595 | image.DownloadMemory(map, 0, copies); | 595 | image.DownloadMemory(map, copies); |
| 596 | runtime.Finish(); | 596 | runtime.Finish(); |
| 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 598 | } | 598 | } |
| @@ -750,24 +750,24 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; |
| 751 | } | 751 | } |
| 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 753 | size_t buffer_offset = 0; | 753 | const size_t original_offset = download_map.offset; |
| 754 | for (const ImageId image_id : download_ids) { | 754 | for (const ImageId image_id : download_ids) { |
| 755 | Image& image = slot_images[image_id]; | 755 | Image& image = slot_images[image_id]; |
| 756 | const auto copies = FullDownloadCopies(image.info); | 756 | const auto copies = FullDownloadCopies(image.info); |
| 757 | image.DownloadMemory(download_map, buffer_offset, copies); | 757 | image.DownloadMemory(download_map, copies); |
| 758 | buffer_offset += image.unswizzled_size_bytes; | 758 | download_map.offset += image.unswizzled_size_bytes; |
| 759 | } | 759 | } |
| 760 | // Wait for downloads to finish | 760 | // Wait for downloads to finish |
| 761 | runtime.Finish(); | 761 | runtime.Finish(); |
| 762 | 762 | ||
| 763 | buffer_offset = 0; | 763 | download_map.offset = original_offset; |
| 764 | const std::span<u8> download_span = download_map.mapped_span; | 764 | std::span<u8> download_span = download_map.mapped_span; |
| 765 | for (const ImageId image_id : download_ids) { | 765 | for (const ImageId image_id : download_ids) { |
| 766 | const ImageBase& image = slot_images[image_id]; | 766 | const ImageBase& image = slot_images[image_id]; |
| 767 | const auto copies = FullDownloadCopies(image.info); | 767 | const auto copies = FullDownloadCopies(image.info); |
| 768 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | 768 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); |
| 769 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | 769 | download_map.offset += image.unswizzled_size_bytes; |
| 770 | buffer_offset += image.unswizzled_size_bytes; | 770 | download_span = download_span.subspan(image.unswizzled_size_bytes); |
| 771 | } | 771 | } |
| 772 | committed_downloads.pop(); | 772 | committed_downloads.pop(); |
| 773 | } | 773 | } |
| @@ -798,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
| 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 799 | return; | 799 | return; |
| 800 | } | 800 | } |
| 801 | auto map = runtime.UploadStagingBuffer(MapSizeBytes(image)); | 801 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); |
| 802 | UploadImageContents(image, map, 0); | 802 | UploadImageContents(image, staging); |
| 803 | runtime.InsertUploadMemoryBarrier(); | 803 | runtime.InsertUploadMemoryBarrier(); |
| 804 | } | 804 | } |
| 805 | 805 | ||
| 806 | template <class P> | 806 | template <class P> |
| 807 | template <typename MapBuffer> | 807 | template <typename StagingBuffer> |
| 808 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | 808 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { |
| 809 | const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset); | 809 | const std::span<u8> mapped_span = staging.mapped_span; |
| 810 | const GPUVAddr gpu_addr = image.gpu_addr; | 810 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 811 | 811 | ||
| 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
| 813 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | 813 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
| 814 | const auto uploads = FullUploadSwizzles(image.info); | 814 | const auto uploads = FullUploadSwizzles(image.info); |
| 815 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | 815 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 816 | } else if (True(image.flags & ImageFlagBits::Converted)) { | 816 | } else if (True(image.flags & ImageFlagBits::Converted)) { |
| 817 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | 817 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |
| 818 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | 818 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); |
| 819 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 819 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
| 820 | image.UploadMemory(map, buffer_offset, copies); | 820 | image.UploadMemory(staging, copies); |
| 821 | } else if (image.info.type == ImageType::Buffer) { | 821 | } else if (image.info.type == ImageType::Buffer) { |
| 822 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | 822 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; |
| 823 | image.UploadMemory(map, buffer_offset, copies); | 823 | image.UploadMemory(staging, copies); |
| 824 | } else { | 824 | } else { |
| 825 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | 825 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); |
| 826 | image.UploadMemory(map, buffer_offset, copies); | 826 | image.UploadMemory(staging, copies); |
| 827 | } | 827 | } |
| 828 | } | 828 | } |
| 829 | 829 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index d39bbdc70..2aa0ffbe6 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -168,7 +168,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 168 | X(vkFreeCommandBuffers); | 168 | X(vkFreeCommandBuffers); |
| 169 | X(vkFreeDescriptorSets); | 169 | X(vkFreeDescriptorSets); |
| 170 | X(vkFreeMemory); | 170 | X(vkFreeMemory); |
| 171 | X(vkGetBufferMemoryRequirements); | 171 | X(vkGetBufferMemoryRequirements2); |
| 172 | X(vkGetDeviceQueue); | 172 | X(vkGetDeviceQueue); |
| 173 | X(vkGetEventStatus); | 173 | X(vkGetEventStatus); |
| 174 | X(vkGetFenceStatus); | 174 | X(vkGetFenceStatus); |
| @@ -786,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { | |||
| 786 | return DeviceMemory(memory, handle, *dld); | 786 | return DeviceMemory(memory, handle, *dld); |
| 787 | } | 787 | } |
| 788 | 788 | ||
| 789 | VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { | 789 | VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, |
| 790 | VkMemoryRequirements requirements; | 790 | void* pnext) const noexcept { |
| 791 | dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); | 791 | const VkBufferMemoryRequirementsInfo2 info{ |
| 792 | return requirements; | 792 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, |
| 793 | .pNext = nullptr, | ||
| 794 | .buffer = buffer, | ||
| 795 | }; | ||
| 796 | VkMemoryRequirements2 requirements{ | ||
| 797 | .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, | ||
| 798 | .pNext = pnext, | ||
| 799 | .memoryRequirements{}, | ||
| 800 | }; | ||
| 801 | dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); | ||
| 802 | return requirements.memoryRequirements; | ||
| 793 | } | 803 | } |
| 794 | 804 | ||
| 795 | VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { | 805 | VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 7f781b081..3e36d356a 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -283,7 +283,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 283 | PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; | 283 | PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; |
| 284 | PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; | 284 | PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; |
| 285 | PFN_vkFreeMemory vkFreeMemory{}; | 285 | PFN_vkFreeMemory vkFreeMemory{}; |
| 286 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; | 286 | PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; |
| 287 | PFN_vkGetDeviceQueue vkGetDeviceQueue{}; | 287 | PFN_vkGetDeviceQueue vkGetDeviceQueue{}; |
| 288 | PFN_vkGetEventStatus vkGetEventStatus{}; | 288 | PFN_vkGetEventStatus vkGetEventStatus{}; |
| 289 | PFN_vkGetFenceStatus vkGetFenceStatus{}; | 289 | PFN_vkGetFenceStatus vkGetFenceStatus{}; |
| @@ -871,7 +871,8 @@ public: | |||
| 871 | 871 | ||
| 872 | DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; | 872 | DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; |
| 873 | 873 | ||
| 874 | VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; | 874 | VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, |
| 875 | void* pnext = nullptr) const noexcept; | ||
| 875 | 876 | ||
| 876 | VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; | 877 | VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; |
| 877 | 878 | ||