summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2023-02-12 00:18:52 -0800
committerGravatar GitHub2023-02-12 00:18:52 -0800
commitf70fcdb873f768403af37622fbc195433b3605ef (patch)
treeb1fbe289c78b39bde41e7a895d8e172d4f3a1927 /src
parentMerge pull request #9781 from ColinKinloch/info_id_typo (diff)
parenttexture_cache: OpenGL: Implement MSAA uploads and copies (diff)
downloadyuzu-f70fcdb873f768403af37622fbc195433b3605ef.tar.gz
yuzu-f70fcdb873f768403af37622fbc195433b3605ef.tar.xz
yuzu-f70fcdb873f768403af37622fbc195433b3605ef.zip
Merge pull request #9746 from ameerj/ogl-msaa-texcache
texture_cache: OpenGL: Implement MSAA uploads and copies
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt2
-rw-r--r--src/video_core/host_shaders/convert_msaa_to_non_msaa.comp30
-rw-r--r--src/video_core/host_shaders/convert_non_msaa_to_msaa.comp29
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h9
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp33
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h7
-rw-r--r--src/video_core/texture_cache/formatter.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h14
-rw-r--r--src/video_core/texture_cache/util.cpp5
12 files changed, 136 insertions, 14 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 52cd5bb81..2442c3c29 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -22,6 +22,8 @@ set(SHADER_FILES
22 convert_d24s8_to_abgr8.frag 22 convert_d24s8_to_abgr8.frag
23 convert_depth_to_float.frag 23 convert_depth_to_float.frag
24 convert_float_to_depth.frag 24 convert_float_to_depth.frag
25 convert_msaa_to_non_msaa.comp
26 convert_non_msaa_to_msaa.comp
25 convert_s8d24_to_abgr8.frag 27 convert_s8d24_to_abgr8.frag
26 full_screen_triangle.vert 28 full_screen_triangle.vert
27 fxaa.frag 29 fxaa.frag
diff --git a/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
new file mode 100644
index 000000000..fc3854d18
--- /dev/null
+++ b/src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
@@ -0,0 +1,30 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#version 450 core
5layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
6
7layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in;
8layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img;
9
10void main() {
11 const ivec3 coords = ivec3(gl_GlobalInvocationID);
12 if (any(greaterThanEqual(coords, imageSize(msaa_in)))) {
13 return;
14 }
15
16 // TODO: Specialization constants for num_samples?
17 const int num_samples = imageSamples(msaa_in);
18 for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) {
19 const vec4 pixel = imageLoad(msaa_in, coords, curr_sample);
20
21 const int single_sample_x = 2 * coords.x + (curr_sample & 1);
22 const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1);
23 const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z);
24
25 if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) {
26 continue;
27 }
28 imageStore(output_img, dest_coords, pixel);
29 }
30}
diff --git a/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
new file mode 100644
index 000000000..dedd962f1
--- /dev/null
+++ b/src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
@@ -0,0 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#version 450 core
5layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
6
7layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in;
8layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa;
9
10void main() {
11 const ivec3 coords = ivec3(gl_GlobalInvocationID);
12 if (any(greaterThanEqual(coords, imageSize(output_msaa)))) {
13 return;
14 }
15
16 // TODO: Specialization constants for num_samples?
17 const int num_samples = imageSamples(output_msaa);
18 for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) {
19 const int single_sample_x = 2 * coords.x + (curr_sample & 1);
20 const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1);
21 const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z);
22
23 if (any(greaterThanEqual(single_coords, imageSize(img_in)))) {
24 continue;
25 }
26 const vec4 pixel = imageLoad(img_in, single_coords);
27 imageStore(output_msaa, coords, curr_sample, pixel);
28 }
29}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9f7ce7414..eb6e43a08 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
557 } 557 }
558} 558}
559 559
560void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image,
561 std::span<const VideoCommon::ImageCopy> copies) {
562 LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples,
563 dst_image.info.num_samples);
564 // TODO: Leverage the format conversion pass if possible/accurate.
565 util_shaders.CopyMSAA(dst_image, src_image, copies);
566}
567
560void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 568void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
561 std::span<const VideoCommon::ImageCopy> copies) { 569 std::span<const VideoCommon::ImageCopy> copies) {
562 LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); 570 LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 5d9d370f2..e30875496 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -93,12 +93,19 @@ public:
93 return device.CanReportMemoryUsage(); 93 return device.CanReportMemoryUsage();
94 } 94 }
95 95
96 bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { 96 bool ShouldReinterpret([[maybe_unused]] Image& dst,
97 [[maybe_unused]] Image& src) const noexcept {
98 return true;
99 }
100
101 bool CanUploadMSAA() const noexcept {
97 return true; 102 return true;
98 } 103 }
99 104
100 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 105 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
101 106
107 void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
108
102 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 109 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
103 110
104 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { 111 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 404def62e..2c7ac210b 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -12,6 +12,8 @@
12#include "video_core/host_shaders/astc_decoder_comp.h" 12#include "video_core/host_shaders/astc_decoder_comp.h"
13#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" 13#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
14#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" 14#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
15#include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h"
16#include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h"
15#include "video_core/host_shaders/opengl_convert_s8d24_comp.h" 17#include "video_core/host_shaders/opengl_convert_s8d24_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 18#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h" 19#include "video_core/host_shaders/pitch_unswizzle_comp.h"
@@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
51 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), 53 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
52 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), 54 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
53 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)), 55 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)),
54 convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) { 56 convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)),
57 convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)),
58 convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) {
55 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); 59 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
56 swizzle_table_buffer.Create(); 60 swizzle_table_buffer.Create();
57 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); 61 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
@@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi
269 program_manager.RestoreGuestCompute(); 273 program_manager.RestoreGuestCompute();
270} 274}
271 275
276void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image,
277 std::span<const VideoCommon::ImageCopy> copies) {
278 const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1;
279 const auto program_handle =
280 is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle;
281 program_manager.BindComputeProgram(program_handle);
282
283 for (const ImageCopy& copy : copies) {
284 ASSERT(copy.src_subresource.base_layer == 0);
285 ASSERT(copy.src_subresource.num_layers == 1);
286 ASSERT(copy.dst_subresource.base_layer == 0);
287 ASSERT(copy.dst_subresource.num_layers == 1);
288
289 glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE,
290 0, GL_READ_ONLY, GL_RGBA8);
291 glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE,
292 0, GL_WRITE_ONLY, GL_RGBA8);
293
294 const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U);
295 const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U);
296 const u32 num_dispatches_z = copy.extent.depth;
297
298 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
299 }
300 program_manager.RestoreGuestCompute();
301}
302
272GLenum StoreFormat(u32 bytes_per_block) { 303GLenum StoreFormat(u32 bytes_per_block) {
273 switch (bytes_per_block) { 304 switch (bytes_per_block) {
274 case 1: 305 case 1:
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
index 44efb6ecf..9013808e7 100644
--- a/src/video_core/renderer_opengl/util_shaders.h
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -40,6 +40,9 @@ public:
40 40
41 void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies); 41 void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies);
42 42
43 void CopyMSAA(Image& dst_image, Image& src_image,
44 std::span<const VideoCommon::ImageCopy> copies);
45
43private: 46private:
44 ProgramManager& program_manager; 47 ProgramManager& program_manager;
45 48
@@ -51,6 +54,8 @@ private:
51 OGLProgram pitch_unswizzle_program; 54 OGLProgram pitch_unswizzle_program;
52 OGLProgram copy_bc4_program; 55 OGLProgram copy_bc4_program;
53 OGLProgram convert_s8d24_program; 56 OGLProgram convert_s8d24_program;
57 OGLProgram convert_ms_to_nonms_program;
58 OGLProgram convert_nonms_to_ms_program;
54}; 59};
55 60
56GLenum StoreFormat(u32 bytes_per_block); 61GLenum StoreFormat(u32 bytes_per_block);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index d39372ec4..9b85dfb5e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
1230 }); 1230 });
1231} 1231}
1232 1232
1233void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
1234 std::span<const VideoCommon::ImageCopy> copies) {
1235 UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan.");
1236}
1237
1233u64 TextureCacheRuntime::GetDeviceLocalMemory() const { 1238u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
1234 return device.GetDeviceLocalMemory(); 1239 return device.GetDeviceLocalMemory();
1235} 1240}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 1f27a3589..b9ee83de7 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -70,6 +70,8 @@ public:
70 70
71 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 71 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
72 72
73 void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
74
73 bool ShouldReinterpret(Image& dst, Image& src); 75 bool ShouldReinterpret(Image& dst, Image& src);
74 76
75 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); 77 void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
@@ -80,6 +82,11 @@ public:
80 return false; 82 return false;
81 } 83 }
82 84
85 bool CanUploadMSAA() const noexcept {
86 // TODO: Implement buffer to MSAA uploads
87 return false;
88 }
89
83 void AccelerateImageUpload(Image&, const StagingBufferRef&, 90 void AccelerateImageUpload(Image&, const StagingBufferRef&,
84 std::span<const VideoCommon::SwizzleParameters>); 91 std::span<const VideoCommon::SwizzleParameters>);
85 92
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index 418890126..30f72361d 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) {
22 const u32 num_layers = image.info.resources.layers; 22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels; 23 const u32 num_levels = image.info.resources.levels;
24 std::string resource; 24 std::string resource;
25 if (image.info.num_samples > 1) {
26 resource += fmt::format(":{}xMSAA", image.info.num_samples);
27 }
25 if (num_layers > 1) { 28 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers); 29 resource += fmt::format(":L{}", num_layers);
27 } 30 }
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1b01990a4..3e2cbb0b0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -773,7 +773,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
773 image.flags &= ~ImageFlagBits::CpuModified; 773 image.flags &= ~ImageFlagBits::CpuModified;
774 TrackImage(image, image_id); 774 TrackImage(image, image_id);
775 775
776 if (image.info.num_samples > 1) { 776 if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
777 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 777 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
778 return; 778 return;
779 } 779 }
@@ -1167,14 +1167,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1167 if (True(overlap.flags & ImageFlagBits::GpuModified)) { 1167 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1168 new_image.flags |= ImageFlagBits::GpuModified; 1168 new_image.flags |= ImageFlagBits::GpuModified;
1169 } 1169 }
1170 const auto& resolution = Settings::values.resolution_info;
1171 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
1172 const u32 up_scale = can_rescale ? resolution.up_scale : 1;
1173 const u32 down_shift = can_rescale ? resolution.down_shift : 0;
1174 auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
1170 if (overlap.info.num_samples != new_image.info.num_samples) { 1175 if (overlap.info.num_samples != new_image.info.num_samples) {
1171 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); 1176 runtime.CopyImageMSAA(new_image, overlap, std::move(copies));
1172 } else { 1177 } else {
1173 const auto& resolution = Settings::values.resolution_info;
1174 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
1175 const u32 up_scale = can_rescale ? resolution.up_scale : 1;
1176 const u32 down_shift = can_rescale ? resolution.down_shift : 0;
1177 auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift);
1178 runtime.CopyImage(new_image, overlap, std::move(copies)); 1178 runtime.CopyImage(new_image, overlap, std::move(copies));
1179 } 1179 }
1180 if (True(overlap.flags & ImageFlagBits::Tracked)) { 1180 if (True(overlap.flags & ImageFlagBits::Tracked)) {
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 03acc68d9..697f86641 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
573 if (info.type == ImageType::Buffer) { 573 if (info.type == ImageType::Buffer) {
574 return info.size.width * BytesPerBlock(info.format); 574 return info.size.width * BytesPerBlock(info.format);
575 } 575 }
576 if (info.num_samples > 1) {
577 // Multisample images can't be uploaded or downloaded to the host
578 return 0;
579 }
580 if (info.type == ImageType::Linear) { 576 if (info.type == ImageType::Linear) {
581 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); 577 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
582 } 578 }
@@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
703std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, 699std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
704 SubresourceBase base, u32 up_scale, u32 down_shift) { 700 SubresourceBase base, u32 up_scale, u32 down_shift) {
705 ASSERT(dst.resources.levels >= src.resources.levels); 701 ASSERT(dst.resources.levels >= src.resources.levels);
706 ASSERT(dst.num_samples == src.num_samples);
707 702
708 const bool is_dst_3d = dst.type == ImageType::e3D; 703 const bool is_dst_3d = dst.type == ImageType::e3D;
709 if (is_dst_3d) { 704 if (is_dst_3d) {