summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-12-30 02:25:23 -0300
committerGravatar ReinUsesLisp2020-12-30 03:38:50 -0300
commit9764c13d6d2977903f407761b27d847c0056e1c4 (patch)
treef6f5d6d6379b0404147969e7d1f548ed3d49ca01
parentvideo_core: Add a delayed destruction ring abstraction (diff)
downloadyuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.gz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.xz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.zip
video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
-rw-r--r--src/video_core/CMakeLists.txt47
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h19
-rw-r--r--src/video_core/command_classes/vic.cpp8
-rw-r--r--src/video_core/compatible_formats.cpp142
-rw-r--r--src/video_core/compatible_formats.h23
-rw-r--r--src/video_core/dirty_flags.cpp7
-rw-r--r--src/video_core/dirty_flags.h3
-rw-r--r--src/video_core/engines/fermi_2d.cpp89
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp26
-rw-r--r--src/video_core/engines/kepler_compute.h5
-rw-r--r--src/video_core/engines/maxwell_3d.cpp45
-rw-r--r--src/video_core/engines/maxwell_3d.h127
-rw-r--r--src/video_core/engines/maxwell_dma.cpp3
-rw-r--r--src/video_core/fence_manager.h17
-rw-r--r--src/video_core/memory_manager.cpp5
-rw-r--r--src/video_core/morton.cpp250
-rw-r--r--src/video_core/morton.h18
-rw-r--r--src/video_core/rasterizer_interface.h12
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_device.h13
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp85
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h68
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp504
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h63
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h15
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1454
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h286
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h13
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp49
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h1
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp224
-rw-r--r--src/video_core/renderer_opengl/util_shaders.h51
-rw-r--r--src/video_core/renderer_opengl/utils.cpp42
-rw-r--r--src/video_core/renderer_opengl/utils.h16
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp624
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h97
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h1
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp30
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h2
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp13
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h3
-rw-r--r--src/video_core/renderer_vulkan/shaders/blit.frag24
-rw-r--r--src/video_core/renderer_vulkan/shaders/blit.vert28
-rw-r--r--src/video_core/renderer_vulkan/shaders/quad_array.comp37
-rw-r--r--src/video_core/renderer_vulkan/shaders/quad_indexed.comp50
-rw-r--r--src/video_core/renderer_vulkan/shaders/uint8.comp33
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp67
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp327
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp110
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp64
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h26
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp135
-rw-r--r--src/video_core/renderer_vulkan/vk_image.h84
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp700
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h131
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp158
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp79
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp1473
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h328
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h30
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp69
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h117
-rw-r--r--src/video_core/sampler_cache.cpp21
-rw-r--r--src/video_core/sampler_cache.h60
-rw-r--r--src/video_core/shader/async_shaders.cpp9
-rw-r--r--src/video_core/shader/async_shaders.h6
-rw-r--r--src/video_core/shader/decode.cpp6
-rw-r--r--src/video_core/shader/decode/image.cpp11
-rw-r--r--src/video_core/shader/decode/texture.cpp56
-rw-r--r--src/video_core/shader/node.h33
-rw-r--r--src/video_core/shader/shader_ir.h18
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h152
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp216
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp299
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2397
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1232
-rw-r--r--src/video_core/texture_cache/util.h107
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
152 files changed, 10359 insertions, 8101 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index acf96f789..948e167c3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -85,14 +85,10 @@ add_library(video_core STATIC
85 renderer_opengl/gl_device.h 85 renderer_opengl/gl_device.h
86 renderer_opengl/gl_fence_manager.cpp 86 renderer_opengl/gl_fence_manager.cpp
87 renderer_opengl/gl_fence_manager.h 87 renderer_opengl/gl_fence_manager.h
88 renderer_opengl/gl_framebuffer_cache.cpp
89 renderer_opengl/gl_framebuffer_cache.h
90 renderer_opengl/gl_rasterizer.cpp 88 renderer_opengl/gl_rasterizer.cpp
91 renderer_opengl/gl_rasterizer.h 89 renderer_opengl/gl_rasterizer.h
92 renderer_opengl/gl_resource_manager.cpp 90 renderer_opengl/gl_resource_manager.cpp
93 renderer_opengl/gl_resource_manager.h 91 renderer_opengl/gl_resource_manager.h
94 renderer_opengl/gl_sampler_cache.cpp
95 renderer_opengl/gl_sampler_cache.h
96 renderer_opengl/gl_shader_cache.cpp 92 renderer_opengl/gl_shader_cache.cpp
97 renderer_opengl/gl_shader_cache.h 93 renderer_opengl/gl_shader_cache.h
98 renderer_opengl/gl_shader_decompiler.cpp 94 renderer_opengl/gl_shader_decompiler.cpp
@@ -114,8 +110,10 @@ add_library(video_core STATIC
114 renderer_opengl/maxwell_to_gl.h 110 renderer_opengl/maxwell_to_gl.h
115 renderer_opengl/renderer_opengl.cpp 111 renderer_opengl/renderer_opengl.cpp
116 renderer_opengl/renderer_opengl.h 112 renderer_opengl/renderer_opengl.h
117 renderer_opengl/utils.cpp 113 renderer_opengl/util_shaders.cpp
118 renderer_opengl/utils.h 114 renderer_opengl/util_shaders.h
115 renderer_vulkan/blit_image.cpp
116 renderer_vulkan/blit_image.h
119 renderer_vulkan/fixed_pipeline_state.cpp 117 renderer_vulkan/fixed_pipeline_state.cpp
120 renderer_vulkan/fixed_pipeline_state.h 118 renderer_vulkan/fixed_pipeline_state.h
121 renderer_vulkan/maxwell_to_vk.cpp 119 renderer_vulkan/maxwell_to_vk.cpp
@@ -142,8 +140,6 @@ add_library(video_core STATIC
142 renderer_vulkan/vk_fence_manager.h 140 renderer_vulkan/vk_fence_manager.h
143 renderer_vulkan/vk_graphics_pipeline.cpp 141 renderer_vulkan/vk_graphics_pipeline.cpp
144 renderer_vulkan/vk_graphics_pipeline.h 142 renderer_vulkan/vk_graphics_pipeline.h
145 renderer_vulkan/vk_image.cpp
146 renderer_vulkan/vk_image.h
147 renderer_vulkan/vk_master_semaphore.cpp 143 renderer_vulkan/vk_master_semaphore.cpp
148 renderer_vulkan/vk_master_semaphore.h 144 renderer_vulkan/vk_master_semaphore.h
149 renderer_vulkan/vk_memory_manager.cpp 145 renderer_vulkan/vk_memory_manager.cpp
@@ -154,12 +150,8 @@ add_library(video_core STATIC
154 renderer_vulkan/vk_query_cache.h 150 renderer_vulkan/vk_query_cache.h
155 renderer_vulkan/vk_rasterizer.cpp 151 renderer_vulkan/vk_rasterizer.cpp
156 renderer_vulkan/vk_rasterizer.h 152 renderer_vulkan/vk_rasterizer.h
157 renderer_vulkan/vk_renderpass_cache.cpp
158 renderer_vulkan/vk_renderpass_cache.h
159 renderer_vulkan/vk_resource_pool.cpp 153 renderer_vulkan/vk_resource_pool.cpp
160 renderer_vulkan/vk_resource_pool.h 154 renderer_vulkan/vk_resource_pool.h
161 renderer_vulkan/vk_sampler_cache.cpp
162 renderer_vulkan/vk_sampler_cache.h
163 renderer_vulkan/vk_scheduler.cpp 155 renderer_vulkan/vk_scheduler.cpp
164 renderer_vulkan/vk_scheduler.h 156 renderer_vulkan/vk_scheduler.h
165 renderer_vulkan/vk_shader_decompiler.cpp 157 renderer_vulkan/vk_shader_decompiler.cpp
@@ -180,8 +172,6 @@ add_library(video_core STATIC
180 renderer_vulkan/vk_update_descriptor.h 172 renderer_vulkan/vk_update_descriptor.h
181 renderer_vulkan/wrapper.cpp 173 renderer_vulkan/wrapper.cpp
182 renderer_vulkan/wrapper.h 174 renderer_vulkan/wrapper.h
183 sampler_cache.cpp
184 sampler_cache.h
185 shader_cache.h 175 shader_cache.h
186 shader_notify.cpp 176 shader_notify.cpp
187 shader_notify.h 177 shader_notify.h
@@ -238,19 +228,32 @@ add_library(video_core STATIC
238 shader/transform_feedback.h 228 shader/transform_feedback.h
239 surface.cpp 229 surface.cpp
240 surface.h 230 surface.h
231 texture_cache/accelerated_swizzle.cpp
232 texture_cache/accelerated_swizzle.h
233 texture_cache/decode_bc4.cpp
234 texture_cache/decode_bc4.h
235 texture_cache/descriptor_table.h
236 texture_cache/formatter.cpp
237 texture_cache/formatter.h
241 texture_cache/format_lookup_table.cpp 238 texture_cache/format_lookup_table.cpp
242 texture_cache/format_lookup_table.h 239 texture_cache/format_lookup_table.h
243 texture_cache/surface_base.cpp 240 texture_cache/image_base.cpp
244 texture_cache/surface_base.h 241 texture_cache/image_base.h
245 texture_cache/surface_params.cpp 242 texture_cache/image_info.cpp
246 texture_cache/surface_params.h 243 texture_cache/image_info.h
247 texture_cache/surface_view.cpp 244 texture_cache/image_view_base.cpp
248 texture_cache/surface_view.h 245 texture_cache/image_view_base.h
246 texture_cache/image_view_info.cpp
247 texture_cache/image_view_info.h
248 texture_cache/render_targets.h
249 texture_cache/samples_helper.h
250 texture_cache/slot_vector.h
249 texture_cache/texture_cache.h 251 texture_cache/texture_cache.h
252 texture_cache/types.h
253 texture_cache/util.cpp
254 texture_cache/util.h
250 textures/astc.cpp 255 textures/astc.cpp
251 textures/astc.h 256 textures/astc.h
252 textures/convert.cpp
253 textures/convert.h
254 textures/decoders.cpp 257 textures/decoders.cpp
255 textures/decoders.h 258 textures/decoders.h
256 textures/texture.cpp 259 textures/texture.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 38961f3fd..83b9ee871 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -118,20 +118,17 @@ public:
118 /// Prepares the buffer cache for data uploading 118 /// Prepares the buffer cache for data uploading
119 /// @param max_size Maximum number of bytes that will be uploaded 119 /// @param max_size Maximum number of bytes that will be uploaded
120 /// @return True when a stream buffer invalidation was required, false otherwise 120 /// @return True when a stream buffer invalidation was required, false otherwise
121 bool Map(std::size_t max_size) { 121 void Map(std::size_t max_size) {
122 std::lock_guard lock{mutex}; 122 std::lock_guard lock{mutex};
123 123
124 bool invalidated; 124 std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
125 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
126 buffer_offset = buffer_offset_base; 125 buffer_offset = buffer_offset_base;
127
128 return invalidated;
129 } 126 }
130 127
131 /// Finishes the upload stream 128 /// Finishes the upload stream
132 void Unmap() { 129 void Unmap() {
133 std::lock_guard lock{mutex}; 130 std::lock_guard lock{mutex};
134 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 131 stream_buffer.Unmap(buffer_offset - buffer_offset_base);
135 } 132 }
136 133
137 /// Function called at the end of each frame, inteded for deferred operations 134 /// Function called at the end of each frame, inteded for deferred operations
@@ -261,9 +258,9 @@ public:
261protected: 258protected:
262 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 259 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
263 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 260 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
264 std::unique_ptr<StreamBuffer> stream_buffer_) 261 StreamBuffer& stream_buffer_)
265 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, 262 : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
266 stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} 263 stream_buffer{stream_buffer_} {}
267 264
268 ~BufferCache() = default; 265 ~BufferCache() = default;
269 266
@@ -441,7 +438,7 @@ private:
441 438
442 buffer_ptr += size; 439 buffer_ptr += size;
443 buffer_offset += size; 440 buffer_offset += size;
444 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; 441 return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
445 } 442 }
446 443
447 void AlignBuffer(std::size_t alignment) { 444 void AlignBuffer(std::size_t alignment) {
@@ -567,9 +564,7 @@ private:
567 VideoCore::RasterizerInterface& rasterizer; 564 VideoCore::RasterizerInterface& rasterizer;
568 Tegra::MemoryManager& gpu_memory; 565 Tegra::MemoryManager& gpu_memory;
569 Core::Memory::Memory& cpu_memory; 566 Core::Memory::Memory& cpu_memory;
570 567 StreamBuffer& stream_buffer;
571 std::unique_ptr<StreamBuffer> stream_buffer;
572 BufferType stream_buffer_handle;
573 568
574 u8* buffer_ptr = nullptr; 569 u8* buffer_ptr = nullptr;
575 u64 buffer_offset = 0; 570 u64 buffer_offset = 0;
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index 66e21ce9c..aa8c9f9de 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -9,7 +9,7 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h" 10#include "video_core/gpu.h"
11#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
12#include "video_core/texture_cache/surface_params.h" 12#include "video_core/textures/decoders.h"
13 13
14extern "C" { 14extern "C" {
15#include <libswscale/swscale.h> 15#include <libswscale/swscale.h>
@@ -105,9 +105,9 @@ void Vic::Execute() {
105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 105 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
106 block_height, 0); 106 block_height, 0);
107 std::vector<u8> swizzled_data(size); 107 std::vector<u8> swizzled_data(size);
108 Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4, 108 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
109 swizzled_data.data(), converted_frame_buffer.get(), 109 frame->width, 4, swizzled_data.data(),
110 false, block_height, 0, 1); 110 converted_frame_buffer.get(), block_height, 0, 0);
111 111
112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); 112 gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
113 gpu.Maxwell3D().OnMemoryWrite(); 113 gpu.Maxwell3D().OnMemoryWrite();
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
index b06c32c84..1619d8664 100644
--- a/src/video_core/compatible_formats.cpp
+++ b/src/video_core/compatible_formats.cpp
@@ -3,9 +3,9 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6#include <bitset>
7#include <cstddef> 6#include <cstddef>
8 7
8#include "common/common_types.h"
9#include "video_core/compatible_formats.h" 9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h" 10#include "video_core/surface.h"
11 11
@@ -13,23 +13,25 @@ namespace VideoCore::Surface {
13 13
14namespace { 14namespace {
15 15
16using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
17
16// Compatibility table taken from Table 3.X.2 in: 18// Compatibility table taken from Table 3.X.2 in:
17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt 19// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
18 20
19constexpr std::array VIEW_CLASS_128_BITS = { 21constexpr std::array VIEW_CLASS_128_BITS{
20 PixelFormat::R32G32B32A32_FLOAT, 22 PixelFormat::R32G32B32A32_FLOAT,
21 PixelFormat::R32G32B32A32_UINT, 23 PixelFormat::R32G32B32A32_UINT,
22 PixelFormat::R32G32B32A32_SINT, 24 PixelFormat::R32G32B32A32_SINT,
23}; 25};
24 26
25constexpr std::array VIEW_CLASS_96_BITS = { 27constexpr std::array VIEW_CLASS_96_BITS{
26 PixelFormat::R32G32B32_FLOAT, 28 PixelFormat::R32G32B32_FLOAT,
27}; 29};
28// Missing formats: 30// Missing formats:
29// PixelFormat::RGB32UI, 31// PixelFormat::RGB32UI,
30// PixelFormat::RGB32I, 32// PixelFormat::RGB32I,
31 33
32constexpr std::array VIEW_CLASS_64_BITS = { 34constexpr std::array VIEW_CLASS_64_BITS{
33 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT, 35 PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
34 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT, 36 PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
35 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 37 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
@@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
38 40
39// TODO: How should we handle 48 bits? 41// TODO: How should we handle 48 bits?
40 42
41constexpr std::array VIEW_CLASS_32_BITS = { 43constexpr std::array VIEW_CLASS_32_BITS{
42 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT, 44 PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
43 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT, 45 PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
44 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM, 46 PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
@@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
50 52
51// TODO: How should we handle 24 bits? 53// TODO: How should we handle 24 bits?
52 54
53constexpr std::array VIEW_CLASS_16_BITS = { 55constexpr std::array VIEW_CLASS_16_BITS{
54 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT, 56 PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
55 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM, 57 PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
56 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT, 58 PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
57}; 59};
58 60
59constexpr std::array VIEW_CLASS_8_BITS = { 61constexpr std::array VIEW_CLASS_8_BITS{
60 PixelFormat::R8_UINT, 62 PixelFormat::R8_UINT,
61 PixelFormat::R8_UNORM, 63 PixelFormat::R8_UNORM,
62 PixelFormat::R8_SINT, 64 PixelFormat::R8_SINT,
63 PixelFormat::R8_SNORM, 65 PixelFormat::R8_SNORM,
64}; 66};
65 67
66constexpr std::array VIEW_CLASS_RGTC1_RED = { 68constexpr std::array VIEW_CLASS_RGTC1_RED{
67 PixelFormat::BC4_UNORM, 69 PixelFormat::BC4_UNORM,
68 PixelFormat::BC4_SNORM, 70 PixelFormat::BC4_SNORM,
69}; 71};
70 72
71constexpr std::array VIEW_CLASS_RGTC2_RG = { 73constexpr std::array VIEW_CLASS_RGTC2_RG{
72 PixelFormat::BC5_UNORM, 74 PixelFormat::BC5_UNORM,
73 PixelFormat::BC5_SNORM, 75 PixelFormat::BC5_SNORM,
74}; 76};
75 77
76constexpr std::array VIEW_CLASS_BPTC_UNORM = { 78constexpr std::array VIEW_CLASS_BPTC_UNORM{
77 PixelFormat::BC7_UNORM, 79 PixelFormat::BC7_UNORM,
78 PixelFormat::BC7_SRGB, 80 PixelFormat::BC7_SRGB,
79}; 81};
80 82
81constexpr std::array VIEW_CLASS_BPTC_FLOAT = { 83constexpr std::array VIEW_CLASS_BPTC_FLOAT{
82 PixelFormat::BC6H_SFLOAT, 84 PixelFormat::BC6H_SFLOAT,
83 PixelFormat::BC6H_UFLOAT, 85 PixelFormat::BC6H_UFLOAT,
84}; 86};
85 87
88constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
89 PixelFormat::ASTC_2D_4X4_UNORM,
90 PixelFormat::ASTC_2D_4X4_SRGB,
91};
92
93constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
94 PixelFormat::ASTC_2D_5X4_UNORM,
95 PixelFormat::ASTC_2D_5X4_SRGB,
96};
97
98constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
99 PixelFormat::ASTC_2D_5X5_UNORM,
100 PixelFormat::ASTC_2D_5X5_SRGB,
101};
102
103constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
104 PixelFormat::ASTC_2D_6X5_UNORM,
105 PixelFormat::ASTC_2D_6X5_SRGB,
106};
107
108constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
109 PixelFormat::ASTC_2D_6X6_UNORM,
110 PixelFormat::ASTC_2D_6X6_SRGB,
111};
112
113constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
114 PixelFormat::ASTC_2D_8X5_UNORM,
115 PixelFormat::ASTC_2D_8X5_SRGB,
116};
117
118constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
119 PixelFormat::ASTC_2D_8X8_UNORM,
120 PixelFormat::ASTC_2D_8X8_SRGB,
121};
122
123// Missing formats:
124// PixelFormat::ASTC_2D_10X5_UNORM
125// PixelFormat::ASTC_2D_10X5_SRGB
126
127// Missing formats:
128// PixelFormat::ASTC_2D_10X6_UNORM
129// PixelFormat::ASTC_2D_10X6_SRGB
130
131constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
132 PixelFormat::ASTC_2D_10X8_UNORM,
133 PixelFormat::ASTC_2D_10X8_SRGB,
134};
135
136constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
137 PixelFormat::ASTC_2D_10X10_UNORM,
138 PixelFormat::ASTC_2D_10X10_SRGB,
139};
140
141// Missing formats
142// ASTC_2D_12X10_UNORM,
143// ASTC_2D_12X10_SRGB,
144
145constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
146 PixelFormat::ASTC_2D_12X12_UNORM,
147 PixelFormat::ASTC_2D_12X12_SRGB,
148};
149
86// Compatibility table taken from Table 4.X.1 in: 150// Compatibility table taken from Table 4.X.1 in:
87// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt 151// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
88 152
89constexpr std::array COPY_CLASS_128_BITS = { 153constexpr std::array COPY_CLASS_128_BITS{
90 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT, 154 PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
91 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM, 155 PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
92 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM, 156 PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
@@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
97// PixelFormat::RGBA32I 161// PixelFormat::RGBA32I
98// COMPRESSED_RG_RGTC2 162// COMPRESSED_RG_RGTC2
99 163
100constexpr std::array COPY_CLASS_64_BITS = { 164constexpr std::array COPY_CLASS_64_BITS{
101 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT, 165 PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
102 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM, 166 PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
103 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT, 167 PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
@@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
110// COMPRESSED_RGBA_S3TC_DXT1_EXT 174// COMPRESSED_RGBA_S3TC_DXT1_EXT
111// COMPRESSED_SIGNED_RED_RGTC1 175// COMPRESSED_SIGNED_RED_RGTC1
112 176
113void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { 177constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
114 compatiblity[format_a][format_b] = true; 178 table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
115 compatiblity[format_b][format_a] = true; 179 table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
116} 180}
117 181
118void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { 182constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
119 Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); 183 Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
120} 184}
121 185
122template <typename Range> 186template <typename Range>
123void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { 187constexpr void EnableRange(Table& table, const Range& range) {
124 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { 188 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
125 for (auto it_b = it_a; it_b != range.end(); ++it_b) { 189 for (auto it_b = it_a; it_b != range.end(); ++it_b) {
126 Enable(compatibility, *it_a, *it_b); 190 Enable(table, *it_a, *it_b);
127 } 191 }
128 } 192 }
129} 193}
130 194
131} // Anonymous namespace 195constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
196 const size_t a = static_cast<size_t>(format_a);
197 const size_t b = static_cast<size_t>(format_b);
198 return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
199}
132 200
133FormatCompatibility::FormatCompatibility() { 201constexpr Table MakeViewTable() {
202 Table view{};
134 for (size_t i = 0; i < MaxPixelFormat; ++i) { 203 for (size_t i = 0; i < MaxPixelFormat; ++i) {
135 // Identity is allowed 204 // Identity is allowed
136 Enable(view, i, i); 205 Enable(view, i, i);
137 } 206 }
138
139 EnableRange(view, VIEW_CLASS_128_BITS); 207 EnableRange(view, VIEW_CLASS_128_BITS);
140 EnableRange(view, VIEW_CLASS_96_BITS); 208 EnableRange(view, VIEW_CLASS_96_BITS);
141 EnableRange(view, VIEW_CLASS_64_BITS); 209 EnableRange(view, VIEW_CLASS_64_BITS);
@@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {
146 EnableRange(view, VIEW_CLASS_RGTC2_RG); 214 EnableRange(view, VIEW_CLASS_RGTC2_RG);
147 EnableRange(view, VIEW_CLASS_BPTC_UNORM); 215 EnableRange(view, VIEW_CLASS_BPTC_UNORM);
148 EnableRange(view, VIEW_CLASS_BPTC_FLOAT); 216 EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
217 EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
218 EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
219 EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
220 EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
221 EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
222 EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
223 EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
224 EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
225 EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
226 EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
227 return view;
228}
149 229
150 copy = view; 230constexpr Table MakeCopyTable() {
231 Table copy = MakeViewTable();
151 EnableRange(copy, COPY_CLASS_128_BITS); 232 EnableRange(copy, COPY_CLASS_128_BITS);
152 EnableRange(copy, COPY_CLASS_64_BITS); 233 EnableRange(copy, COPY_CLASS_64_BITS);
234 return copy;
235}
236
237} // Anonymous namespace
238
239bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
240 static constexpr Table TABLE = MakeViewTable();
241 return IsSupported(TABLE, format_a, format_b);
242}
243
244bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
245 static constexpr Table TABLE = MakeCopyTable();
246 return IsSupported(TABLE, format_a, format_b);
153} 247}
154 248
155} // namespace VideoCore::Surface 249} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
index 51766349b..b5eb03bea 100644
--- a/src/video_core/compatible_formats.h
+++ b/src/video_core/compatible_formats.h
@@ -4,31 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <cstddef>
10
11#include "video_core/surface.h" 7#include "video_core/surface.h"
12 8
13namespace VideoCore::Surface { 9namespace VideoCore::Surface {
14 10
15class FormatCompatibility { 11bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);
16public:
17 using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
18
19 explicit FormatCompatibility();
20
21 bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
22 return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
23 }
24
25 bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
26 return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
27 }
28 12
29private: 13bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
30 Table view;
31 Table copy;
32};
33 14
34} // namespace VideoCore::Surface 15} // namespace VideoCore::Surface
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp
index 2faa6ef0e..b1eaac00c 100644
--- a/src/video_core/dirty_flags.cpp
+++ b/src/video_core/dirty_flags.cpp
@@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {
16using Tegra::Engines::Maxwell3D; 16using Tegra::Engines::Maxwell3D;
17 17
18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { 18void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
19 FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
20 FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
21
19 static constexpr std::size_t num_per_rt = NUM(rt[0]); 22 static constexpr std::size_t num_per_rt = NUM(rt[0]);
20 static constexpr std::size_t begin = OFF(rt); 23 static constexpr std::size_t begin = OFF(rt);
21 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; 24 static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
@@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
23 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); 26 FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
24 } 27 }
25 FillBlock(tables[1], begin, num, RenderTargets); 28 FillBlock(tables[1], begin, num, RenderTargets);
29 FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
30
31 tables[0][OFF(rt_control)] = RenderTargets;
32 tables[1][OFF(rt_control)] = RenderTargetControl;
26 33
27 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; 34 static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
28 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { 35 for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h
index 3f6c1d83a..875527ddd 100644
--- a/src/video_core/dirty_flags.h
+++ b/src/video_core/dirty_flags.h
@@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
16enum : u8 { 16enum : u8 {
17 NullEntry = 0, 17 NullEntry = 0,
18 18
19 Descriptors,
20
19 RenderTargets, 21 RenderTargets,
22 RenderTargetControl,
20 ColorBuffer0, 23 ColorBuffer0,
21 ColorBuffer1, 24 ColorBuffer1,
22 ColorBuffer2, 25 ColorBuffer2,
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 4293d676c..a01d334ad 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,11 @@
10 10
11namespace Tegra::Engines { 11namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D() = default; 13Fermi2D::Fermi2D() {
14 // Nvidia's OpenGL driver seems to assume these values
15 regs.src.depth = 1;
16 regs.dst.depth = 1;
17}
14 18
15Fermi2D::~Fermi2D() = default; 19Fermi2D::~Fermi2D() = default;
16 20
@@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
21void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 25void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
22 ASSERT_MSG(method < Regs::NUM_REGS, 26 ASSERT_MSG(method < Regs::NUM_REGS,
23 "Invalid Fermi2D register, increase the size of the Regs structure"); 27 "Invalid Fermi2D register, increase the size of the Regs structure");
24
25 regs.reg_array[method] = method_argument; 28 regs.reg_array[method] = method_argument;
26 29
27 switch (method) { 30 if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
28 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, 31 Blit();
29 // so trigger on the second 32-bit write.
30 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
31 HandleSurfaceCopy();
32 break;
33 }
34 } 32 }
35} 33}
36 34
37void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { 35void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
38 for (std::size_t i = 0; i < amount; i++) { 36 for (u32 i = 0; i < amount; ++i) {
39 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 37 CallMethod(method, base_start[i], methods_pending - i <= 1);
40 } 38 }
41} 39}
42 40
43static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) { 41void Fermi2D::Blit() {
44 const u32 line_a = src_2 - src_1; 42 LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
45 const u32 line_b = dst_2 - dst_1; 43 regs.src.Address(), regs.dst.Address());
46 const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
47 return {line_b - (excess * line_b) / line_a, excess};
48}
49
50void Fermi2D::HandleSurfaceCopy() {
51 LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
52 44
53 // TODO(Subv): Only raw copies are implemented. 45 UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
54 ASSERT(regs.operation == Operation::SrcCopy); 46 UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
47 UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
48 UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
49 UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
55 50
56 const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; 51 const auto& args = regs.pixels_from_memory;
57 const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; 52 const Config config{
58 u32 src_blit_x2, src_blit_y2;
59 if (regs.blit_control.origin == Origin::Corner) {
60 src_blit_x2 =
61 static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
62 src_blit_y2 =
63 static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
64 } else {
65 src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
66 src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
67 }
68 u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
69 u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
70 const auto [new_dst_w, src_excess_x] =
71 DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
72 const auto [new_dst_h, src_excess_y] =
73 DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
74 dst_blit_x2 = new_dst_w + regs.blit_dst_x;
75 src_blit_x2 = src_blit_x2 - src_excess_x;
76 dst_blit_y2 = new_dst_h + regs.blit_dst_y;
77 src_blit_y2 = src_blit_y2 - src_excess_y;
78 const auto [new_src_w, dst_excess_x] =
79 DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
80 const auto [new_src_h, dst_excess_y] =
81 DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
82 src_blit_x2 = new_src_w + src_blit_x1;
83 dst_blit_x2 = dst_blit_x2 - dst_excess_x;
84 src_blit_y2 = new_src_h + src_blit_y1;
85 dst_blit_y2 = dst_blit_y2 - dst_excess_y;
86 const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
87 const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
88 dst_blit_y2};
89 const Config copy_config{
90 .operation = regs.operation, 53 .operation = regs.operation,
91 .filter = regs.blit_control.filter, 54 .filter = args.sample_mode.filter,
92 .src_rect = src_rect, 55 .dst_x0 = args.dst_x0,
93 .dst_rect = dst_rect, 56 .dst_y0 = args.dst_y0,
57 .dst_x1 = args.dst_x0 + args.dst_width,
58 .dst_y1 = args.dst_y0 + args.dst_height,
59 .src_x0 = static_cast<s32>(args.src_x0 >> 32),
60 .src_y0 = static_cast<s32>(args.src_y0 >> 32),
61 .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
62 .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
94 }; 63 };
95 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { 64 if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
96 UNIMPLEMENTED(); 65 UNIMPLEMENTED();
97 } 66 }
98} 67}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0909709ec..81522988e 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -53,8 +53,8 @@ public:
53 }; 53 };
54 54
55 enum class Filter : u32 { 55 enum class Filter : u32 {
56 PointSample = 0, // Nearest 56 Point = 0,
57 Linear = 1, 57 Bilinear = 1,
58 }; 58 };
59 59
60 enum class Operation : u32 { 60 enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
67 BlendPremult = 6, 67 BlendPremult = 6,
68 }; 68 };
69 69
70 struct Regs { 70 enum class MemoryLayout : u32 {
71 static constexpr std::size_t NUM_REGS = 0x258; 71 BlockLinear = 0,
72 Pitch = 1,
73 };
72 74
73 struct Surface { 75 enum class CpuIndexWrap : u32 {
74 RenderTargetFormat format; 76 Wrap = 0,
75 BitField<0, 1, u32> linear; 77 NoWrap = 1,
76 union { 78 };
77 BitField<0, 4, u32> block_width;
78 BitField<4, 4, u32> block_height;
79 BitField<8, 4, u32> block_depth;
80 };
81 u32 depth;
82 u32 layer;
83 u32 pitch;
84 u32 width;
85 u32 height;
86 u32 address_high;
87 u32 address_low;
88
89 GPUVAddr Address() const {
90 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
91 address_low);
92 }
93
94 u32 BlockWidth() const {
95 return block_width.Value();
96 }
97
98 u32 BlockHeight() const {
99 return block_height.Value();
100 }
101
102 u32 BlockDepth() const {
103 return block_depth.Value();
104 }
105 };
106 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
107 79
80 struct Surface {
81 RenderTargetFormat format;
82 MemoryLayout linear;
108 union { 83 union {
109 struct { 84 BitField<0, 4, u32> block_width;
110 INSERT_UNION_PADDING_WORDS(0x80); 85 BitField<4, 4, u32> block_height;
86 BitField<8, 4, u32> block_depth;
87 };
88 u32 depth;
89 u32 layer;
90 u32 pitch;
91 u32 width;
92 u32 height;
93 u32 addr_upper;
94 u32 addr_lower;
95
96 [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
97 return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
98 }
99 };
100 static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
111 101
112 Surface dst; 102 enum class SectorPromotion : u32 {
103 NoPromotion = 0,
104 PromoteTo2V = 1,
105 PromoteTo2H = 2,
106 PromoteTo4 = 3,
107 };
108
109 enum class NumTpcs : u32 {
110 All = 0,
111 One = 1,
112 };
113 113
114 INSERT_UNION_PADDING_WORDS(2); 114 enum class RenderEnableMode : u32 {
115 False = 0,
116 True = 1,
117 Conditional = 2,
118 RenderIfEqual = 3,
119 RenderIfNotEqual = 4,
120 };
115 121
116 Surface src; 122 enum class ColorKeyFormat : u32 {
123 A16R56G6B5 = 0,
124 A1R5G55B5 = 1,
125 A8R8G8B8 = 2,
126 A2R10G10B10 = 3,
127 Y8 = 4,
128 Y16 = 5,
129 Y32 = 6,
130 };
117 131
118 INSERT_UNION_PADDING_WORDS(0x15); 132 union Beta4 {
133 BitField<0, 8, u32> b;
134 BitField<8, 8, u32> g;
135 BitField<16, 8, u32> r;
136 BitField<24, 8, u32> a;
137 };
119 138
120 Operation operation; 139 struct Point {
140 u32 x;
141 u32 y;
142 };
121 143
122 INSERT_UNION_PADDING_WORDS(0x177); 144 enum class PatternSelect : u32 {
145 MonoChrome8x8 = 0,
146 MonoChrome64x1 = 1,
147 MonoChrome1x64 = 2,
148 Color = 3,
149 };
123 150
151 enum class NotifyType : u32 {
152 WriteOnly = 0,
153 WriteThenAwaken = 1,
154 };
155
156 enum class MonochromePatternColorFormat : u32 {
157 A8X8R8G6B5 = 0,
158 A1R5G5B5 = 1,
159 A8R8G8B8 = 2,
160 A8Y8 = 3,
161 A8X8Y16 = 4,
162 Y32 = 5,
163 };
164
165 enum class MonochromePatternFormat : u32 {
166 CGA6_M1 = 0,
167 LE_M1 = 1,
168 };
169
170 union Regs {
171 static constexpr std::size_t NUM_REGS = 0x258;
172 struct {
173 u32 object;
174 INSERT_UNION_PADDING_WORDS(0x3F);
175 u32 no_operation;
176 NotifyType notify;
177 INSERT_UNION_PADDING_WORDS(0x2);
178 u32 wait_for_idle;
179 INSERT_UNION_PADDING_WORDS(0xB);
180 u32 pm_trigger;
181 INSERT_UNION_PADDING_WORDS(0xF);
182 u32 context_dma_notify;
183 u32 dst_context_dma;
184 u32 src_context_dma;
185 u32 semaphore_context_dma;
186 INSERT_UNION_PADDING_WORDS(0x1C);
187 Surface dst;
188 CpuIndexWrap pixels_from_cpu_index_wrap;
189 u32 kind2d_check_enable;
190 Surface src;
191 SectorPromotion pixels_from_memory_sector_promotion;
192 INSERT_UNION_PADDING_WORDS(0x1);
193 NumTpcs num_tpcs;
194 u32 render_enable_addr_upper;
195 u32 render_enable_addr_lower;
196 RenderEnableMode render_enable_mode;
197 INSERT_UNION_PADDING_WORDS(0x4);
198 u32 clip_x0;
199 u32 clip_y0;
200 u32 clip_width;
201 u32 clip_height;
202 BitField<0, 1, u32> clip_enable;
203 BitField<0, 3, ColorKeyFormat> color_key_format;
204 u32 color_key;
205 BitField<0, 1, u32> color_key_enable;
206 BitField<0, 8, u32> rop;
207 u32 beta1;
208 Beta4 beta4;
209 Operation operation;
210 union {
211 BitField<0, 6, u32> x;
212 BitField<8, 6, u32> y;
213 } pattern_offset;
214 BitField<0, 2, PatternSelect> pattern_select;
215 INSERT_UNION_PADDING_WORDS(0xC);
216 struct {
217 BitField<0, 3, MonochromePatternColorFormat> color_format;
218 BitField<0, 1, MonochromePatternFormat> format;
219 u32 color0;
220 u32 color1;
221 u32 pattern0;
222 u32 pattern1;
223 } monochrome_pattern;
224 struct {
225 std::array<u32, 0x40> X8R8G8B8;
226 std::array<u32, 0x20> R5G6B5;
227 std::array<u32, 0x20> X1R5G5B5;
228 std::array<u32, 0x10> Y8;
229 } color_pattern;
230 INSERT_UNION_PADDING_WORDS(0x10);
231 struct {
232 u32 prim_mode;
233 u32 prim_color_format;
234 u32 prim_color;
235 u32 line_tie_break_bits;
236 INSERT_UNION_PADDING_WORDS(0x14);
237 u32 prim_point_xy;
238 INSERT_UNION_PADDING_WORDS(0x7);
239 std::array<Point, 0x40> prim_point;
240 } render_solid;
241 struct {
242 u32 data_type;
243 u32 color_format;
244 u32 index_format;
245 u32 mono_format;
246 u32 wrap;
247 u32 color0;
248 u32 color1;
249 u32 mono_opacity;
250 INSERT_UNION_PADDING_WORDS(0x6);
251 u32 src_width;
252 u32 src_height;
253 u32 dx_du_frac;
254 u32 dx_du_int;
255 u32 dx_dv_frac;
256 u32 dy_dv_int;
257 u32 dst_x0_frac;
258 u32 dst_x0_int;
259 u32 dst_y0_frac;
260 u32 dst_y0_int;
261 u32 data;
262 } pixels_from_cpu;
263 INSERT_UNION_PADDING_WORDS(0x3);
264 u32 big_endian_control;
265 INSERT_UNION_PADDING_WORDS(0x3);
266 struct {
267 BitField<0, 3, u32> block_shape;
268 BitField<0, 5, u32> corral_size;
269 BitField<0, 1, u32> safe_overlap;
124 union { 270 union {
125 u32 raw;
126 BitField<0, 1, Origin> origin; 271 BitField<0, 1, Origin> origin;
127 BitField<4, 1, Filter> filter; 272 BitField<4, 1, Filter> filter;
128 } blit_control; 273 } sample_mode;
129
130 INSERT_UNION_PADDING_WORDS(0x8); 274 INSERT_UNION_PADDING_WORDS(0x8);
131 275 s32 dst_x0;
132 u32 blit_dst_x; 276 s32 dst_y0;
133 u32 blit_dst_y; 277 s32 dst_width;
134 u32 blit_dst_width; 278 s32 dst_height;
135 u32 blit_dst_height; 279 s64 du_dx;
136 u64 blit_du_dx; 280 s64 dv_dy;
137 u64 blit_dv_dy; 281 s64 src_x0;
138 u64 blit_src_x; 282 s64 src_y0;
139 u64 blit_src_y; 283 } pixels_from_memory;
140
141 INSERT_UNION_PADDING_WORDS(0x21);
142 };
143 std::array<u32, NUM_REGS> reg_array;
144 }; 284 };
285 std::array<u32, NUM_REGS> reg_array;
145 } regs{}; 286 } regs{};
146 287
147 struct Config { 288 struct Config {
148 Operation operation{}; 289 Operation operation;
149 Filter filter{}; 290 Filter filter;
150 Common::Rectangle<u32> src_rect; 291 s32 dst_x0;
151 Common::Rectangle<u32> dst_rect; 292 s32 dst_y0;
293 s32 dst_x1;
294 s32 dst_y1;
295 s32 src_x0;
296 s32 src_y0;
297 s32 src_x1;
298 s32 src_y1;
152 }; 299 };
153 300
154private: 301private:
@@ -156,25 +303,49 @@ private:
156 303
157 /// Performs the copy from the source surface to the destination surface as configured in the 304 /// Performs the copy from the source surface to the destination surface as configured in the
158 /// registers. 305 /// registers.
159 void HandleSurfaceCopy(); 306 void Blit();
160}; 307};
161 308
162#define ASSERT_REG_POSITION(field_name, position) \ 309#define ASSERT_REG_POSITION(field_name, position) \
163 static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \ 310 static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
164 "Field " #field_name " has invalid position") 311 "Field " #field_name " has invalid position")
165 312
166ASSERT_REG_POSITION(dst, 0x80); 313ASSERT_REG_POSITION(object, 0x0);
167ASSERT_REG_POSITION(src, 0x8C); 314ASSERT_REG_POSITION(no_operation, 0x100);
168ASSERT_REG_POSITION(operation, 0xAB); 315ASSERT_REG_POSITION(notify, 0x104);
169ASSERT_REG_POSITION(blit_control, 0x223); 316ASSERT_REG_POSITION(wait_for_idle, 0x110);
170ASSERT_REG_POSITION(blit_dst_x, 0x22c); 317ASSERT_REG_POSITION(pm_trigger, 0x140);
171ASSERT_REG_POSITION(blit_dst_y, 0x22d); 318ASSERT_REG_POSITION(context_dma_notify, 0x180);
172ASSERT_REG_POSITION(blit_dst_width, 0x22e); 319ASSERT_REG_POSITION(dst_context_dma, 0x184);
173ASSERT_REG_POSITION(blit_dst_height, 0x22f); 320ASSERT_REG_POSITION(src_context_dma, 0x188);
174ASSERT_REG_POSITION(blit_du_dx, 0x230); 321ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
175ASSERT_REG_POSITION(blit_dv_dy, 0x232); 322ASSERT_REG_POSITION(dst, 0x200);
176ASSERT_REG_POSITION(blit_src_x, 0x234); 323ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
177ASSERT_REG_POSITION(blit_src_y, 0x236); 324ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
325ASSERT_REG_POSITION(src, 0x230);
326ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
327ASSERT_REG_POSITION(num_tpcs, 0x260);
328ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
329ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
330ASSERT_REG_POSITION(clip_x0, 0x280);
331ASSERT_REG_POSITION(clip_y0, 0x284);
332ASSERT_REG_POSITION(clip_width, 0x288);
333ASSERT_REG_POSITION(clip_height, 0x28c);
334ASSERT_REG_POSITION(clip_enable, 0x290);
335ASSERT_REG_POSITION(color_key_format, 0x294);
336ASSERT_REG_POSITION(color_key, 0x298);
337ASSERT_REG_POSITION(rop, 0x2A0);
338ASSERT_REG_POSITION(beta1, 0x2A4);
339ASSERT_REG_POSITION(beta4, 0x2A8);
340ASSERT_REG_POSITION(operation, 0x2AC);
341ASSERT_REG_POSITION(pattern_offset, 0x2B0);
342ASSERT_REG_POSITION(pattern_select, 0x2B4);
343ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
344ASSERT_REG_POSITION(color_pattern, 0x300);
345ASSERT_REG_POSITION(render_solid, 0x580);
346ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
347ASSERT_REG_POSITION(big_endian_control, 0x870);
348ASSERT_REG_POSITION(pixels_from_memory, 0x880);
178 349
179#undef ASSERT_REG_POSITION 350#undef ASSERT_REG_POSITION
180 351
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..ba387506e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
58 } 58 }
59} 59}
60 60
61Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
62 const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
63 ASSERT(cbuf_mask[regs.tex_cb_index]);
64
65 const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
66 ASSERT(texinfo.Address() != 0);
67
68 const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
69 ASSERT(address < texinfo.Address() + texinfo.size);
70
71 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
72 return GetTextureInfo(tex_handle);
73}
74
75Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
76 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
77}
78
79u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { 61u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
80 ASSERT(stage == ShaderType::Compute); 62 ASSERT(stage == ShaderType::Compute);
81 const auto& buffer = launch_description.const_buffer_config[const_buffer]; 63 const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
98 80
99SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { 81SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
100 const Texture::TextureHandle tex_handle{handle}; 82 const Texture::TextureHandle tex_handle{handle};
101 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 83 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
102 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 84 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
103 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 85
86 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
87 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
104 return result; 88 return result;
105} 89}
106 90
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..51a041202 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -209,11 +209,6 @@ public:
209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, 209 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
210 u32 methods_pending) override; 210 u32 methods_pending) override;
211 211
212 Texture::FullTextureInfo GetTexture(std::size_t offset) const;
213
214 /// Given a texture handle, returns the TSC and TIC entries.
215 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
216
217 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 212 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
218 213
219 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 214 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 761962ed0..9be651e24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cinttypes>
6#include <cstring> 5#include <cstring>
7#include <optional> 6#include <optional>
8#include "common/assert.h" 7#include "common/assert.h"
@@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
227 OnMemoryWrite(); 226 OnMemoryWrite();
228 } 227 }
229 return; 228 return;
229 case MAXWELL3D_REG_INDEX(fragment_barrier):
230 return rasterizer->FragmentBarrier();
231 case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
232 return rasterizer->TiledCacheBarrier();
230 } 233 }
231} 234}
232 235
@@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {
639} 642}
640 643
641Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 644Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
642 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)}; 645 const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
643 646
644 Texture::TICEntry tic_entry; 647 Texture::TICEntry tic_entry;
645 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 648 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
648} 651}
649 652
650Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 653Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
651 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)}; 654 const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
652 655
653 Texture::TSCEntry tsc_entry; 656 Texture::TSCEntry tsc_entry;
654 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); 657 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
655 return tsc_entry; 658 return tsc_entry;
656} 659}
657 660
658Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
659 return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
660}
661
662Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
663 const auto stage_index = static_cast<std::size_t>(stage);
664 const auto& shader = state.shader_stages[stage_index];
665 const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
666 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
667
668 const GPUVAddr tex_info_address =
669 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
670
671 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
672
673 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
674
675 return GetTextureInfo(tex_handle);
676}
677
678u32 Maxwell3D::GetRegisterValue(u32 method) const { 661u32 Maxwell3D::GetRegisterValue(u32 method) const {
679 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); 662 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
680 return regs.reg_array[method]; 663 return regs.reg_array[method];
681} 664}
682 665
683void Maxwell3D::ProcessClearBuffers() { 666void Maxwell3D::ProcessClearBuffers() {
684 ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
685 regs.clear_buffers.R == regs.clear_buffers.B &&
686 regs.clear_buffers.R == regs.clear_buffers.A);
687
688 rasterizer->Clear(); 667 rasterizer->Clear();
689} 668}
690 669
@@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
692 ASSERT(stage != ShaderType::Compute); 671 ASSERT(stage != ShaderType::Compute);
693 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; 672 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
694 const auto& buffer = shader_stage.const_buffers[const_buffer]; 673 const auto& buffer = shader_stage.const_buffers[const_buffer];
695 u32 result; 674 return memory_manager.Read<u32>(buffer.address + offset);
696 std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
697 return result;
698} 675}
699 676
700SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { 677SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
712 689
713SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { 690SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
714 const Texture::TextureHandle tex_handle{handle}; 691 const Texture::TextureHandle tex_handle{handle};
715 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 692 const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
716 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 693 const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
717 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 694
695 SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
696 result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
718 return result; 697 return result;
719} 698}
720 699
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 564acbc53..bf9e07c9b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -438,16 +438,6 @@ public:
438 DecrWrapOGL = 0x8508, 438 DecrWrapOGL = 0x8508,
439 }; 439 };
440 440
441 enum class MemoryLayout : u32 {
442 Linear = 0,
443 BlockLinear = 1,
444 };
445
446 enum class InvMemoryLayout : u32 {
447 BlockLinear = 0,
448 Linear = 1,
449 };
450
451 enum class CounterReset : u32 { 441 enum class CounterReset : u32 {
452 SampleCnt = 0x01, 442 SampleCnt = 0x01,
453 Unk02 = 0x02, 443 Unk02 = 0x02,
@@ -589,21 +579,31 @@ public:
589 NegativeW = 7, 579 NegativeW = 7,
590 }; 580 };
591 581
582 enum class SamplerIndex : u32 {
583 Independently = 0,
584 ViaHeaderIndex = 1,
585 };
586
587 struct TileMode {
588 union {
589 BitField<0, 4, u32> block_width;
590 BitField<4, 4, u32> block_height;
591 BitField<8, 4, u32> block_depth;
592 BitField<12, 1, u32> is_pitch_linear;
593 BitField<16, 1, u32> is_3d;
594 };
595 };
596 static_assert(sizeof(TileMode) == 4);
597
592 struct RenderTargetConfig { 598 struct RenderTargetConfig {
593 u32 address_high; 599 u32 address_high;
594 u32 address_low; 600 u32 address_low;
595 u32 width; 601 u32 width;
596 u32 height; 602 u32 height;
597 Tegra::RenderTargetFormat format; 603 Tegra::RenderTargetFormat format;
604 TileMode tile_mode;
598 union { 605 union {
599 BitField<0, 3, u32> block_width; 606 BitField<0, 16, u32> depth;
600 BitField<4, 3, u32> block_height;
601 BitField<8, 3, u32> block_depth;
602 BitField<12, 1, InvMemoryLayout> type;
603 BitField<16, 1, u32> is_3d;
604 } memory_layout;
605 union {
606 BitField<0, 16, u32> layers;
607 BitField<16, 1, u32> volume; 607 BitField<16, 1, u32> volume;
608 }; 608 };
609 u32 layer_stride; 609 u32 layer_stride;
@@ -832,7 +832,11 @@ public:
832 832
833 u32 patch_vertices; 833 u32 patch_vertices;
834 834
835 INSERT_UNION_PADDING_WORDS(0xC); 835 INSERT_UNION_PADDING_WORDS(0x4);
836
837 u32 fragment_barrier;
838
839 INSERT_UNION_PADDING_WORDS(0x7);
836 840
837 std::array<ScissorTest, NumViewports> scissor_test; 841 std::array<ScissorTest, NumViewports> scissor_test;
838 842
@@ -842,7 +846,15 @@ public:
842 u32 stencil_back_mask; 846 u32 stencil_back_mask;
843 u32 stencil_back_func_mask; 847 u32 stencil_back_func_mask;
844 848
845 INSERT_UNION_PADDING_WORDS(0xC); 849 INSERT_UNION_PADDING_WORDS(0x5);
850
851 u32 invalidate_texture_data_cache;
852
853 INSERT_UNION_PADDING_WORDS(0x1);
854
855 u32 tiled_cache_barrier;
856
857 INSERT_UNION_PADDING_WORDS(0x4);
846 858
847 u32 color_mask_common; 859 u32 color_mask_common;
848 860
@@ -866,12 +878,7 @@ public:
866 u32 address_high; 878 u32 address_high;
867 u32 address_low; 879 u32 address_low;
868 Tegra::DepthFormat format; 880 Tegra::DepthFormat format;
869 union { 881 TileMode tile_mode;
870 BitField<0, 4, u32> block_width;
871 BitField<4, 4, u32> block_height;
872 BitField<8, 4, u32> block_depth;
873 BitField<20, 1, InvMemoryLayout> type;
874 } memory_layout;
875 u32 layer_stride; 882 u32 layer_stride;
876 883
877 GPUVAddr Address() const { 884 GPUVAddr Address() const {
@@ -880,7 +887,18 @@ public:
880 } 887 }
881 } zeta; 888 } zeta;
882 889
883 INSERT_UNION_PADDING_WORDS(0x41); 890 struct {
891 union {
892 BitField<0, 16, u32> x;
893 BitField<16, 16, u32> width;
894 };
895 union {
896 BitField<0, 16, u32> y;
897 BitField<16, 16, u32> height;
898 };
899 } render_area;
900
901 INSERT_UNION_PADDING_WORDS(0x3F);
884 902
885 union { 903 union {
886 BitField<0, 4, u32> stencil; 904 BitField<0, 4, u32> stencil;
@@ -921,7 +939,7 @@ public:
921 BitField<25, 3, u32> map_7; 939 BitField<25, 3, u32> map_7;
922 }; 940 };
923 941
924 u32 GetMap(std::size_t index) const { 942 u32 Map(std::size_t index) const {
925 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, 943 const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
926 map_4, map_5, map_6, map_7}; 944 map_4, map_5, map_6, map_7};
927 ASSERT(index < maps.size()); 945 ASSERT(index < maps.size());
@@ -934,11 +952,13 @@ public:
934 u32 zeta_width; 952 u32 zeta_width;
935 u32 zeta_height; 953 u32 zeta_height;
936 union { 954 union {
937 BitField<0, 16, u32> zeta_layers; 955 BitField<0, 16, u32> zeta_depth;
938 BitField<16, 1, u32> zeta_volume; 956 BitField<16, 1, u32> zeta_volume;
939 }; 957 };
940 958
941 INSERT_UNION_PADDING_WORDS(0x26); 959 SamplerIndex sampler_index;
960
961 INSERT_UNION_PADDING_WORDS(0x25);
942 962
943 u32 depth_test_enable; 963 u32 depth_test_enable;
944 964
@@ -964,6 +984,7 @@ public:
964 float b; 984 float b;
965 float a; 985 float a;
966 } blend_color; 986 } blend_color;
987
967 INSERT_UNION_PADDING_WORDS(0x4); 988 INSERT_UNION_PADDING_WORDS(0x4);
968 989
969 struct { 990 struct {
@@ -1001,7 +1022,12 @@ public:
1001 float line_width_smooth; 1022 float line_width_smooth;
1002 float line_width_aliased; 1023 float line_width_aliased;
1003 1024
1004 INSERT_UNION_PADDING_WORDS(0x1F); 1025 INSERT_UNION_PADDING_WORDS(0x1B);
1026
1027 u32 invalidate_sampler_cache_no_wfi;
1028 u32 invalidate_texture_header_cache_no_wfi;
1029
1030 INSERT_UNION_PADDING_WORDS(0x2);
1005 1031
1006 u32 vb_element_base; 1032 u32 vb_element_base;
1007 u32 vb_base_instance; 1033 u32 vb_base_instance;
@@ -1045,13 +1071,13 @@ public:
1045 } condition; 1071 } condition;
1046 1072
1047 struct { 1073 struct {
1048 u32 tsc_address_high; 1074 u32 address_high;
1049 u32 tsc_address_low; 1075 u32 address_low;
1050 u32 tsc_limit; 1076 u32 limit;
1051 1077
1052 GPUVAddr TSCAddress() const { 1078 GPUVAddr Address() const {
1053 return static_cast<GPUVAddr>( 1079 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1054 (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low); 1080 address_low);
1055 } 1081 }
1056 } tsc; 1082 } tsc;
1057 1083
@@ -1062,13 +1088,13 @@ public:
1062 u32 line_smooth_enable; 1088 u32 line_smooth_enable;
1063 1089
1064 struct { 1090 struct {
1065 u32 tic_address_high; 1091 u32 address_high;
1066 u32 tic_address_low; 1092 u32 address_low;
1067 u32 tic_limit; 1093 u32 limit;
1068 1094
1069 GPUVAddr TICAddress() const { 1095 GPUVAddr Address() const {
1070 return static_cast<GPUVAddr>( 1096 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
1071 (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low); 1097 address_low);
1072 } 1098 }
1073 } tic; 1099 } tic;
1074 1100
@@ -1397,12 +1423,6 @@ public:
1397 1423
1398 void FlushMMEInlineDraw(); 1424 void FlushMMEInlineDraw();
1399 1425
1400 /// Given a texture handle, returns the TSC and TIC entries.
1401 Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
1402
1403 /// Returns the texture information for a specific texture in a specific shader stage.
1404 Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
1405
1406 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; 1426 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1407 1427
1408 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; 1428 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
1598ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); 1618ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
1599ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); 1619ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
1600ASSERT_REG_POSITION(patch_vertices, 0x373); 1620ASSERT_REG_POSITION(patch_vertices, 0x373);
1621ASSERT_REG_POSITION(fragment_barrier, 0x378);
1601ASSERT_REG_POSITION(scissor_test, 0x380); 1622ASSERT_REG_POSITION(scissor_test, 0x380);
1602ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); 1623ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1603ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1624ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1604ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1625ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1626ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
1627ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
1605ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1628ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1606ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1629ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1607ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); 1630ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1609ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); 1632ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1610ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); 1633ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1611ASSERT_REG_POSITION(zeta, 0x3F8); 1634ASSERT_REG_POSITION(zeta, 0x3F8);
1635ASSERT_REG_POSITION(render_area, 0x3FD);
1612ASSERT_REG_POSITION(clear_flags, 0x43E); 1636ASSERT_REG_POSITION(clear_flags, 0x43E);
1613ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1637ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1614ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1638ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1617ASSERT_REG_POSITION(rt_control, 0x487); 1641ASSERT_REG_POSITION(rt_control, 0x487);
1618ASSERT_REG_POSITION(zeta_width, 0x48a); 1642ASSERT_REG_POSITION(zeta_width, 0x48a);
1619ASSERT_REG_POSITION(zeta_height, 0x48b); 1643ASSERT_REG_POSITION(zeta_height, 0x48b);
1620ASSERT_REG_POSITION(zeta_layers, 0x48c); 1644ASSERT_REG_POSITION(zeta_depth, 0x48c);
1645ASSERT_REG_POSITION(sampler_index, 0x48D);
1621ASSERT_REG_POSITION(depth_test_enable, 0x4B3); 1646ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
1622ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); 1647ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
1623ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); 1648ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1641ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1666ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1642ASSERT_REG_POSITION(line_width_smooth, 0x4EC); 1667ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1643ASSERT_REG_POSITION(line_width_aliased, 0x4ED); 1668ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1669ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
1670ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
1644ASSERT_REG_POSITION(vb_element_base, 0x50D); 1671ASSERT_REG_POSITION(vb_element_base, 0x50D);
1645ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1672ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1646ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1673ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 1c29e895e..ba750748c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
96} 96}
97 97
98void MaxwellDMA::CopyBlockLinearToPitch() { 98void MaxwellDMA::CopyBlockLinearToPitch() {
99 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
99 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); 100 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
100 UNIMPLEMENTED_IF(regs.src_params.layer != 0); 101 UNIMPLEMENTED_IF(regs.src_params.layer != 0);
101 102
@@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
135} 136}
136 137
137void MaxwellDMA::CopyPitchToBlockLinear() { 138void MaxwellDMA::CopyPitchToBlockLinear() {
139 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
140
138 const auto& dst_params = regs.dst_params; 141 const auto& dst_params = regs.dst_params;
139 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; 142 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
140 const u32 width = dst_params.width; 143 const u32 width = dst_params.width;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c5f26896e..3512283ff 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -9,6 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/core.h" 11#include "core/core.h"
12#include "video_core/delayed_destruction_ring.h"
12#include "video_core/gpu.h" 13#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
@@ -47,6 +48,11 @@ protected:
47template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> 48template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
48class FenceManager { 49class FenceManager {
49public: 50public:
51 /// Notify the fence manager about a new frame
52 void TickFrame() {
53 delayed_destruction_ring.Tick();
54 }
55
50 void SignalSemaphore(GPUVAddr addr, u32 value) { 56 void SignalSemaphore(GPUVAddr addr, u32 value) {
51 TryReleasePendingFences(); 57 TryReleasePendingFences();
52 const bool should_flush = ShouldFlush(); 58 const bool should_flush = ShouldFlush();
@@ -86,7 +92,7 @@ public:
86 } else { 92 } else {
87 gpu.IncrementSyncPoint(current_fence->GetPayload()); 93 gpu.IncrementSyncPoint(current_fence->GetPayload());
88 } 94 }
89 fences.pop(); 95 PopFence();
90 } 96 }
91 } 97 }
92 98
@@ -132,7 +138,7 @@ private:
132 } else { 138 } else {
133 gpu.IncrementSyncPoint(current_fence->GetPayload()); 139 gpu.IncrementSyncPoint(current_fence->GetPayload());
134 } 140 }
135 fences.pop(); 141 PopFence();
136 } 142 }
137 } 143 }
138 144
@@ -158,7 +164,14 @@ private:
158 query_cache.CommitAsyncFlushes(); 164 query_cache.CommitAsyncFlushes();
159 } 165 }
160 166
167 void PopFence() {
168 delayed_destruction_ring.Push(std::move(fences.front()));
169 fences.pop();
170 }
171
161 std::queue<TFence> fences; 172 std::queue<TFence> fences;
173
174 DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
162}; 175};
163 176
164} // namespace VideoCommon 177} // namespace VideoCommon
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 6e70bd362..65feff588 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
57 } 57 }
58 58
59 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 59 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
60 system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size); 60 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
61 ASSERT(cpu_addr);
62
63 rasterizer->UnmapMemory(*cpu_addr, size);
61 64
62 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); 65 UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
63} 66}
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9da9fb4ff..e69de29bb 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -1,250 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <cstring>
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/morton.h"
10#include "video_core/surface.h"
11#include "video_core/textures/decoders.h"
12
13namespace VideoCore {
14
15using Surface::GetBytesPerPixel;
16using Surface::PixelFormat;
17
18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
20
21template <bool morton_to_linear, PixelFormat format>
22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
23 u32 tile_width_spacing, u8* buffer, u8* addr) {
24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
25
26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
27 // pixel values.
28 constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
29 constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
30
31 if constexpr (morton_to_linear) {
32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
33 stride, height, depth, block_height, block_depth,
34 tile_width_spacing);
35 } else {
36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
37 (height + tile_size_y - 1) / tile_size_y, depth,
38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
39 block_height, block_depth, tile_width_spacing);
40 }
41}
42
43static constexpr ConversionArray morton_to_linear_fns = {
44 MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
45 MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
46 MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
47 MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
48 MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
49 MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
50 MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
51 MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
52 MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
53 MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
54 MortonCopy<true, PixelFormat::R8_UNORM>,
55 MortonCopy<true, PixelFormat::R8_SNORM>,
56 MortonCopy<true, PixelFormat::R8_SINT>,
57 MortonCopy<true, PixelFormat::R8_UINT>,
58 MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
59 MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
60 MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
61 MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
62 MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
63 MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
64 MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
65 MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
66 MortonCopy<true, PixelFormat::BC2_UNORM>,
67 MortonCopy<true, PixelFormat::BC3_UNORM>,
68 MortonCopy<true, PixelFormat::BC4_UNORM>,
69 MortonCopy<true, PixelFormat::BC4_SNORM>,
70 MortonCopy<true, PixelFormat::BC5_UNORM>,
71 MortonCopy<true, PixelFormat::BC5_SNORM>,
72 MortonCopy<true, PixelFormat::BC7_UNORM>,
73 MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
74 MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
75 MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
76 MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
77 MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
78 MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
79 MortonCopy<true, PixelFormat::R32G32_FLOAT>,
80 MortonCopy<true, PixelFormat::R32G32_SINT>,
81 MortonCopy<true, PixelFormat::R32_FLOAT>,
82 MortonCopy<true, PixelFormat::R16_FLOAT>,
83 MortonCopy<true, PixelFormat::R16_UNORM>,
84 MortonCopy<true, PixelFormat::R16_SNORM>,
85 MortonCopy<true, PixelFormat::R16_UINT>,
86 MortonCopy<true, PixelFormat::R16_SINT>,
87 MortonCopy<true, PixelFormat::R16G16_UNORM>,
88 MortonCopy<true, PixelFormat::R16G16_FLOAT>,
89 MortonCopy<true, PixelFormat::R16G16_UINT>,
90 MortonCopy<true, PixelFormat::R16G16_SINT>,
91 MortonCopy<true, PixelFormat::R16G16_SNORM>,
92 MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
93 MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
94 MortonCopy<true, PixelFormat::R8G8_UNORM>,
95 MortonCopy<true, PixelFormat::R8G8_SNORM>,
96 MortonCopy<true, PixelFormat::R8G8_SINT>,
97 MortonCopy<true, PixelFormat::R8G8_UINT>,
98 MortonCopy<true, PixelFormat::R32G32_UINT>,
99 MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
100 MortonCopy<true, PixelFormat::R32_UINT>,
101 MortonCopy<true, PixelFormat::R32_SINT>,
102 MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
103 MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
104 MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
105 MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
106 MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
107 MortonCopy<true, PixelFormat::BC2_SRGB>,
108 MortonCopy<true, PixelFormat::BC3_SRGB>,
109 MortonCopy<true, PixelFormat::BC7_SRGB>,
110 MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
111 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
112 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
113 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
114 MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
115 MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
116 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
117 MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
118 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
119 MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
120 MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
121 MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
122 MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
123 MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
124 MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
125 MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
126 MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
127 MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
128 MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
129 MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
130 MortonCopy<true, PixelFormat::D32_FLOAT>,
131 MortonCopy<true, PixelFormat::D16_UNORM>,
132 MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
133 MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
134 MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
135};
136
137static constexpr ConversionArray linear_to_morton_fns = {
138 MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
139 MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
140 MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
141 MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
142 MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
143 MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
144 MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
145 MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
146 MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
147 MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
148 MortonCopy<false, PixelFormat::R8_UNORM>,
149 MortonCopy<false, PixelFormat::R8_SNORM>,
150 MortonCopy<false, PixelFormat::R8_SINT>,
151 MortonCopy<false, PixelFormat::R8_UINT>,
152 MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
153 MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
154 MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
155 MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
156 MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
157 MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
158 MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
159 MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
160 MortonCopy<false, PixelFormat::BC2_UNORM>,
161 MortonCopy<false, PixelFormat::BC3_UNORM>,
162 MortonCopy<false, PixelFormat::BC4_UNORM>,
163 MortonCopy<false, PixelFormat::BC4_SNORM>,
164 MortonCopy<false, PixelFormat::BC5_UNORM>,
165 MortonCopy<false, PixelFormat::BC5_SNORM>,
166 MortonCopy<false, PixelFormat::BC7_UNORM>,
167 MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
168 MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
169 // TODO(Subv): Swizzling ASTC formats are not supported
170 nullptr,
171 MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
172 MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
173 MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
174 MortonCopy<false, PixelFormat::R32G32_FLOAT>,
175 MortonCopy<false, PixelFormat::R32G32_SINT>,
176 MortonCopy<false, PixelFormat::R32_FLOAT>,
177 MortonCopy<false, PixelFormat::R16_FLOAT>,
178 MortonCopy<false, PixelFormat::R16_UNORM>,
179 MortonCopy<false, PixelFormat::R16_SNORM>,
180 MortonCopy<false, PixelFormat::R16_UINT>,
181 MortonCopy<false, PixelFormat::R16_SINT>,
182 MortonCopy<false, PixelFormat::R16G16_UNORM>,
183 MortonCopy<false, PixelFormat::R16G16_FLOAT>,
184 MortonCopy<false, PixelFormat::R16G16_UINT>,
185 MortonCopy<false, PixelFormat::R16G16_SINT>,
186 MortonCopy<false, PixelFormat::R16G16_SNORM>,
187 MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
188 MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
189 MortonCopy<false, PixelFormat::R8G8_UNORM>,
190 MortonCopy<false, PixelFormat::R8G8_SNORM>,
191 MortonCopy<false, PixelFormat::R8G8_SINT>,
192 MortonCopy<false, PixelFormat::R8G8_UINT>,
193 MortonCopy<false, PixelFormat::R32G32_UINT>,
194 MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
195 MortonCopy<false, PixelFormat::R32_UINT>,
196 MortonCopy<false, PixelFormat::R32_SINT>,
197 nullptr,
198 nullptr,
199 nullptr,
200 MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
201 MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
202 MortonCopy<false, PixelFormat::BC2_SRGB>,
203 MortonCopy<false, PixelFormat::BC3_SRGB>,
204 MortonCopy<false, PixelFormat::BC7_SRGB>,
205 MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
206 nullptr,
207 nullptr,
208 nullptr,
209 nullptr,
210 nullptr,
211 nullptr,
212 nullptr,
213 nullptr,
214 nullptr,
215 nullptr,
216 nullptr,
217 nullptr,
218 nullptr,
219 nullptr,
220 nullptr,
221 nullptr,
222 nullptr,
223 nullptr,
224 MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
225 MortonCopy<false, PixelFormat::D32_FLOAT>,
226 MortonCopy<false, PixelFormat::D16_UNORM>,
227 MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
228 MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
229 MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
230};
231
232static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
233 switch (mode) {
234 case MortonSwizzleMode::MortonToLinear:
235 return morton_to_linear_fns[static_cast<std::size_t>(format)];
236 case MortonSwizzleMode::LinearToMorton:
237 return linear_to_morton_fns[static_cast<std::size_t>(format)];
238 }
239 UNREACHABLE();
240 return morton_to_linear_fns[static_cast<std::size_t>(format)];
241}
242
243void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
244 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
245 u8* buffer, u8* addr) {
246 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
247 tile_width_spacing, buffer, addr);
248}
249
250} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b714a7e3f..e69de29bb 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -1,18 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/surface.h"
9
10namespace VideoCore {
11
12enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, u8* addr);
17
18} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 27ef4c69a..0cb0f387d 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -76,6 +76,9 @@ public:
76 /// Sync memory between guest and host. 76 /// Sync memory between guest and host.
77 virtual void SyncGuestHost() = 0; 77 virtual void SyncGuestHost() = 0;
78 78
79 /// Unmap memory range
80 virtual void UnmapMemory(VAddr addr, u64 size) = 0;
81
79 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 82 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
80 /// and invalidated 83 /// and invalidated
81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 84 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
@@ -83,6 +86,12 @@ public:
83 /// Notify the host renderer to wait for previous primitive and compute operations. 86 /// Notify the host renderer to wait for previous primitive and compute operations.
84 virtual void WaitForIdle() = 0; 87 virtual void WaitForIdle() = 0;
85 88
89 /// Notify the host renderer to wait for reads and writes to render targets and flush caches.
90 virtual void FragmentBarrier() = 0;
91
92 /// Notify the host renderer to make available previous render target writes.
93 virtual void TiledCacheBarrier() = 0;
94
86 /// Notify the rasterizer to send all written commands to the host GPU. 95 /// Notify the rasterizer to send all written commands to the host GPU.
87 virtual void FlushCommands() = 0; 96 virtual void FlushCommands() = 0;
88 97
@@ -91,8 +100,7 @@ public:
91 100
92 /// Attempt to use a faster method to perform a surface copy 101 /// Attempt to use a faster method to perform a surface copy
93 [[nodiscard]] virtual bool AccelerateSurfaceCopy( 102 [[nodiscard]] virtual bool AccelerateSurfaceCopy(
94 const Tegra::Engines::Fermi2D::Regs::Surface& src, 103 const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
95 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
96 const Tegra::Engines::Fermi2D::Config& copy_config) { 104 const Tegra::Engines::Fermi2D::Config& copy_config) {
97 return false; 105 return false;
98 } 106 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 60735d502..5772cad87 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
61 61
62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 62OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 63 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
64 const Device& device_, std::size_t stream_size_) 64 const Device& device_, OGLStreamBuffer& stream_buffer_,
65 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, 65 StateTracker& state_tracker)
66 std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)}, 66 : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
67 device{device_} {
68 if (!device.HasFastBufferSubData()) { 67 if (!device.HasFastBufferSubData()) {
69 return; 68 return;
70 } 69 }
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 95251e26b..17ee90316 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -22,6 +22,7 @@ namespace OpenGL {
22class Device; 22class Device;
23class OGLStreamBuffer; 23class OGLStreamBuffer;
24class RasterizerOpenGL; 24class RasterizerOpenGL;
25class StateTracker;
25 26
26class Buffer : public VideoCommon::BufferBlock { 27class Buffer : public VideoCommon::BufferBlock {
27public: 28public:
@@ -52,9 +53,10 @@ private:
52using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 53using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
53class OGLBufferCache final : public GenericBufferCache { 54class OGLBufferCache final : public GenericBufferCache {
54public: 55public:
55 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, 56 explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
56 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 57 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
57 const Device& device_, std::size_t stream_size_); 58 const Device& device, OGLStreamBuffer& stream_buffer,
59 StateTracker& state_tracker);
58 ~OGLBufferCache(); 60 ~OGLBufferCache();
59 61
60 BufferInfo GetEmptyBuffer(std::size_t) override; 62 BufferInfo GetEmptyBuffer(std::size_t) override;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a94e4f72e..b24179d59 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -5,9 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstdlib>
8#include <cstring> 9#include <cstring>
9#include <limits> 10#include <limits>
10#include <optional> 11#include <optional>
12#include <span>
11#include <vector> 13#include <vector>
12 14
13#include <glad/glad.h> 15#include <glad/glad.h>
@@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
27 29
28constexpr u32 NumStages = 5; 30constexpr u32 NumStages = 5;
29 31
30constexpr std::array LimitUBOs = { 32constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 33 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 34 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; 35 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34 36};
35constexpr std::array LimitSSBOs = { 37constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 38 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 39 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; 40 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39 41};
40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, 42constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 43 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, 44 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 45 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS, 46 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; 47 GL_MAX_TEXTURE_IMAGE_UNITS,
46 48 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47constexpr std::array LimitImages = { 49};
50constexpr std::array LIMIT_IMAGES = {
48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 51 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, 52 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; 53 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
54};
51 55
52template <typename T> 56template <typename T>
53T GetInteger(GLenum pname) { 57T GetInteger(GLenum pname) {
@@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
76 return extensions; 80 return extensions;
77} 81}
78 82
79bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) { 83bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
80 return std::find(images.begin(), images.end(), extension) != images.end(); 84 return std::ranges::find(extensions, extension) != extensions.end();
81} 85}
82 86
83u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { 87u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
@@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
91 95
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { 96std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max; 97 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), 98 std::ranges::transform(LIMIT_UBOS, max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); }); 99 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max; 100 return max;
97} 101}
98 102
@@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
115 for (std::size_t i = 0; i < NumStages; ++i) { 119 for (std::size_t i = 0; i < NumStages; ++i) {
116 const std::size_t stage = stage_swizzle[i]; 120 const std::size_t stage = stage_swizzle[i];
117 bindings[stage] = { 121 bindings[stage] = {
118 Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), 122 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
119 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), 123 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
120 Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; 124 Extract(base_samplers, num_samplers, total_samplers / NumStages,
125 LIMIT_SAMPLERS[stage])};
121 } 126 }
122 127
123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); 128 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
@@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
130 135
131 // Reserve at least 4 image bindings on the fragment stage. 136 // Reserve at least 4 image bindings on the fragment stage.
132 bindings[4].image = 137 bindings[4].image =
133 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]); 138 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
134 139
135 // This is guaranteed to be at least 1. 140 // This is guaranteed to be at least 1.
136 const u32 total_extracted_images = num_images / (NumStages - 1); 141 const u32 total_extracted_images = num_images / (NumStages - 1);
@@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
142 continue; 147 continue;
143 } 148 }
144 bindings[stage].image = 149 bindings[stage].image =
145 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); 150 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
146 } 151 }
147 152
148 // Compute doesn't care about any of this. 153 // Compute doesn't care about any of this.
@@ -188,6 +193,11 @@ bool IsASTCSupported() {
188 return true; 193 return true;
189} 194}
190 195
196[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
197 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
198 return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
199}
200
191} // Anonymous namespace 201} // Anonymous namespace
192 202
193Device::Device() 203Device::Device()
@@ -206,9 +216,8 @@ Device::Device()
206 "Beta driver 443.24 is known to have issues. There might be performance issues."); 216 "Beta driver 443.24 is known to have issues. There might be performance issues.");
207 disable_fast_buffer_sub_data = true; 217 disable_fast_buffer_sub_data = true;
208 } 218 }
209 219 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
210 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 220 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
211 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
212 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 221 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
213 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 222 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
214 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 223 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
@@ -224,6 +233,7 @@ Device::Device()
224 has_precise_bug = TestPreciseBug(); 233 has_precise_bug = TestPreciseBug();
225 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 234 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
226 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; 235 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
236 has_debugging_tool_attached = IsDebugToolAttached(extensions);
227 237
228 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 238 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
229 // uniform buffers as "push constants" 239 // uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8a4b6b9fc..13e66846c 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -36,11 +36,11 @@ public:
36 return GetBaseBindings(static_cast<std::size_t>(shader_type)); 36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 } 37 }
38 38
39 std::size_t GetUniformBufferAlignment() const { 39 size_t GetUniformBufferAlignment() const {
40 return uniform_buffer_alignment; 40 return uniform_buffer_alignment;
41 } 41 }
42 42
43 std::size_t GetShaderStorageBufferAlignment() const { 43 size_t GetShaderStorageBufferAlignment() const {
44 return shader_storage_alignment; 44 return shader_storage_alignment;
45 } 45 }
46 46
@@ -104,6 +104,10 @@ public:
104 return has_nv_viewport_array2; 104 return has_nv_viewport_array2;
105 } 105 }
106 106
107 bool HasDebuggingToolAttached() const {
108 return has_debugging_tool_attached;
109 }
110
107 bool UseAssemblyShaders() const { 111 bool UseAssemblyShaders() const {
108 return use_assembly_shaders; 112 return use_assembly_shaders;
109 } 113 }
@@ -118,8 +122,8 @@ private:
118 122
119 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; 123 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
120 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; 124 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
121 std::size_t uniform_buffer_alignment{}; 125 size_t uniform_buffer_alignment{};
122 std::size_t shader_storage_alignment{}; 126 size_t shader_storage_alignment{};
123 u32 max_vertex_attributes{}; 127 u32 max_vertex_attributes{};
124 u32 max_varyings{}; 128 u32 max_varyings{};
125 u32 max_compute_shared_memory_size{}; 129 u32 max_compute_shared_memory_size{};
@@ -135,6 +139,7 @@ private:
135 bool has_precise_bug{}; 139 bool has_precise_bug{};
136 bool has_fast_buffer_sub_data{}; 140 bool has_fast_buffer_sub_data{};
137 bool has_nv_viewport_array2{}; 141 bool has_nv_viewport_array2{};
142 bool has_debugging_tool_attached{};
138 bool use_assembly_shaders{}; 143 bool use_assembly_shaders{};
139 bool use_asynchronous_shaders{}; 144 bool use_asynchronous_shaders{};
140}; 145};
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6040646cb..3e9c922f5 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
46} 46}
47 47
48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, 48FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
49 Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_, 49 Tegra::GPU& gpu_, TextureCache& texture_cache_,
50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_) 50 OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} 51 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
52 52
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 39ca6125b..30dbee613 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -33,12 +33,12 @@ private:
33 33
34using Fence = std::shared_ptr<GLInnerFence>; 34using Fence = std::shared_ptr<GLInnerFence>;
35using GenericFenceManager = 35using GenericFenceManager =
36 VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; 36 VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
37 37
38class FenceManagerOpenGL final : public GenericFenceManager { 38class FenceManagerOpenGL final : public GenericFenceManager {
39public: 39public:
40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 40 explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
41 TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_, 41 TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
42 QueryCache& query_cache_); 42 QueryCache& query_cache_);
43 43
44protected: 44protected:
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
deleted file mode 100644
index b8a512cb6..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6#include <unordered_map>
7#include <utility>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
14
15namespace OpenGL {
16
17using Maxwell = Tegra::Engines::Maxwell3D::Regs;
18using VideoCore::Surface::SurfaceType;
19
20FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
21
22FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
23
24GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
25 const auto [entry, is_cache_miss] = cache.try_emplace(key);
26 auto& framebuffer{entry->second};
27 if (is_cache_miss) {
28 framebuffer = CreateFramebuffer(key);
29 }
30 return framebuffer.handle;
31}
32
33OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
34 OGLFramebuffer framebuffer;
35 framebuffer.Create();
36
37 // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
38 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
39
40 if (key.zeta) {
41 const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
42 const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
43 key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
44 }
45
46 std::size_t num_buffers = 0;
47 std::array<GLenum, Maxwell::NumRenderTargets> targets;
48
49 for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
50 if (!key.colors[index]) {
51 targets[index] = GL_NONE;
52 continue;
53 }
54 const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
55 key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
56
57 const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
58 targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
59 num_buffers = index + 1;
60 }
61
62 if (num_buffers > 0) {
63 glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
64 } else {
65 glDrawBuffer(GL_NONE);
66 }
67
68 return framebuffer;
69}
70
71std::size_t FramebufferCacheKey::Hash() const noexcept {
72 std::size_t hash = std::hash<View>{}(zeta);
73 for (const auto& color : colors) {
74 hash ^= std::hash<View>{}(color);
75 }
76 hash ^= static_cast<std::size_t>(color_attachments) << 16;
77 return hash;
78}
79
80bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
81 return std::tie(colors, zeta, color_attachments) ==
82 std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
83}
84
85} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
deleted file mode 100644
index 8f698fee0..000000000
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstddef>
9#include <unordered_map>
10
11#include <glad/glad.h>
12
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_texture_cache.h"
17
18namespace OpenGL {
19
20constexpr std::size_t BitsPerAttachment = 4;
21
22struct FramebufferCacheKey {
23 View zeta;
24 std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
25 u32 color_attachments = 0;
26
27 std::size_t Hash() const noexcept;
28
29 bool operator==(const FramebufferCacheKey& rhs) const noexcept;
30
31 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
32 return !operator==(rhs);
33 }
34
35 void SetAttachment(std::size_t index, u32 attachment) {
36 color_attachments |= attachment << (BitsPerAttachment * index);
37 }
38};
39
40} // namespace OpenGL
41
42namespace std {
43
44template <>
45struct hash<OpenGL::FramebufferCacheKey> {
46 std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
47 return k.Hash();
48 }
49};
50
51} // namespace std
52
53namespace OpenGL {
54
55class FramebufferCacheOpenGL {
56public:
57 FramebufferCacheOpenGL();
58 ~FramebufferCacheOpenGL();
59
60 GLuint GetFramebuffer(const FramebufferCacheKey& key);
61
62private:
63 OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
64
65 std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
66};
67
68} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e58e84759..8aa63d329 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -25,12 +25,15 @@
25#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
26#include "video_core/engines/shader_type.h" 26#include "video_core/engines/shader_type.h"
27#include "video_core/memory_manager.h" 27#include "video_core/memory_manager.h"
28#include "video_core/renderer_opengl/gl_device.h"
28#include "video_core/renderer_opengl/gl_query_cache.h" 29#include "video_core/renderer_opengl/gl_query_cache.h"
29#include "video_core/renderer_opengl/gl_rasterizer.h" 30#include "video_core/renderer_opengl/gl_rasterizer.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 31#include "video_core/renderer_opengl/gl_shader_cache.h"
32#include "video_core/renderer_opengl/gl_texture_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 33#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 34#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h" 35#include "video_core/shader_cache.h"
36#include "video_core/texture_cache/texture_cache.h"
34 37
35namespace OpenGL { 38namespace OpenGL {
36 39
@@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
55 58
56namespace { 59namespace {
57 60
58constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; 61constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = 62constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; 63 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = 64constexpr size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; 65 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63 66
64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; 67constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; 68constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
69
70constexpr size_t MAX_TEXTURES = 192;
71constexpr size_t MAX_IMAGES = 48;
72
73struct TextureHandle {
74 constexpr TextureHandle(u32 data, bool via_header_index) {
75 const Tegra::Texture::TextureHandle handle{data};
76 image = handle.tic_id;
77 sampler = via_header_index ? image : handle.tsc_id.Value();
78 }
79
80 u32 image;
81 u32 sampler;
82};
66 83
67template <typename Engine, typename Entry> 84template <typename Engine, typename Entry>
68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 85TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
69 ShaderType shader_type, std::size_t index = 0) { 86 ShaderType shader_type, size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 87 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) { 88 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer; 89 const u32 buffer_1 = entry.buffer;
@@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
75 const u32 offset_2 = entry.secondary_offset; 92 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); 93 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); 94 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2); 95 return TextureHandle(handle_1 | handle_2, via_header_index);
79 } 96 }
80 } 97 }
81 if (entry.is_bindless) { 98 if (entry.is_bindless) {
82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 99 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
83 return engine.GetTextureInfo(handle); 100 return TextureHandle(raw, via_header_index);
84 }
85
86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
89 return engine.GetStageTexture(shader_type, offset);
90 } else {
91 return engine.GetTexture(offset);
92 } 101 }
102 const u32 buffer = engine.GetBoundBuffer();
103 const u64 offset = (entry.offset + index) * sizeof(u32);
104 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
93} 105}
94 106
95std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, 107std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
@@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
97 if (!entry.IsIndirect()) { 109 if (!entry.IsIndirect()) {
98 return entry.GetSize(); 110 return entry.GetSize();
99 } 111 }
100
101 if (buffer.size > Maxwell::MaxConstBufferSize) { 112 if (buffer.size > Maxwell::MaxConstBufferSize) {
102 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, 113 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
103 Maxwell::MaxConstBufferSize); 114 Maxwell::MaxConstBufferSize);
@@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
147 reinterpret_cast<const GLuint*>(ssbos)); 158 reinterpret_cast<const GLuint*>(ssbos));
148} 159}
149 160
161ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
162 if (entry.is_buffer) {
163 return ImageViewType::Buffer;
164 }
165 switch (entry.type) {
166 case Tegra::Shader::TextureType::Texture1D:
167 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
168 case Tegra::Shader::TextureType::Texture2D:
169 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
170 case Tegra::Shader::TextureType::Texture3D:
171 return ImageViewType::e3D;
172 case Tegra::Shader::TextureType::TextureCube:
173 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
174 }
175 UNREACHABLE();
176 return ImageViewType::e2D;
177}
178
179ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
180 switch (entry.type) {
181 case Tegra::Shader::ImageType::Texture1D:
182 return ImageViewType::e1D;
183 case Tegra::Shader::ImageType::Texture1DArray:
184 return ImageViewType::e1DArray;
185 case Tegra::Shader::ImageType::Texture2D:
186 return ImageViewType::e2D;
187 case Tegra::Shader::ImageType::Texture2DArray:
188 return ImageViewType::e2DArray;
189 case Tegra::Shader::ImageType::Texture3D:
190 return ImageViewType::e3D;
191 case Tegra::Shader::ImageType::TextureBuffer:
192 return ImageViewType::Buffer;
193 }
194 UNREACHABLE();
195 return ImageViewType::e2D;
196}
197
150} // Anonymous namespace 198} // Anonymous namespace
151 199
152RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 200RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
153 Core::Memory::Memory& cpu_memory_, const Device& device_, 201 Core::Memory::Memory& cpu_memory_, const Device& device_,
154 ScreenInfo& screen_info_, ProgramManager& program_manager_, 202 ScreenInfo& screen_info_, ProgramManager& program_manager_,
155 StateTracker& state_tracker_) 203 StateTracker& state_tracker_)
156 : RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), 204 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
157 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), 205 kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
158 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), 206 screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
159 texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), 207 stream_buffer(device, state_tracker),
208 texture_cache_runtime(device, program_manager, state_tracker),
209 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
160 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 210 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
161 query_cache(*this, maxwell3d, gpu_memory), 211 query_cache(*this, maxwell3d, gpu_memory),
162 buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE), 212 buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
163 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 213 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
164 async_shaders(emu_window_) { 214 async_shaders(emu_window_) {
165 CheckExtensions();
166
167 unified_uniform_buffer.Create(); 215 unified_uniform_buffer.Create();
168 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); 216 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
169 217
@@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
174 nullptr, 0); 222 nullptr, 0);
175 } 223 }
176 } 224 }
177
178 if (device.UseAsynchronousShaders()) { 225 if (device.UseAsynchronousShaders()) {
179 async_shaders.AllocateWorkers(); 226 async_shaders.AllocateWorkers();
180 } 227 }
@@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
186 } 233 }
187} 234}
188 235
189void RasterizerOpenGL::CheckExtensions() {
190 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
191 LOG_WARNING(
192 Render_OpenGL,
193 "Anisotropic filter is not supported! This can cause graphical issues in some games.");
194 }
195}
196
197void RasterizerOpenGL::SetupVertexFormat() { 236void RasterizerOpenGL::SetupVertexFormat() {
198 auto& flags = maxwell3d.dirty.flags; 237 auto& flags = maxwell3d.dirty.flags;
199 if (!flags[Dirty::VertexFormats]) { 238 if (!flags[Dirty::VertexFormats]) {
@@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
316 return info.offset; 355 return info.offset;
317} 356}
318 357
319void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 358void RasterizerOpenGL::SetupShaders() {
320 MICROPROFILE_SCOPE(OpenGL_Shader); 359 MICROPROFILE_SCOPE(OpenGL_Shader);
321 u32 clip_distances = 0; 360 u32 clip_distances = 0;
322 361
362 std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
363 image_view_indices.clear();
364 sampler_handles.clear();
365
366 texture_cache.SynchronizeGraphicsDescriptors();
367
323 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 368 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
324 const auto& shader_config = maxwell3d.regs.shader_config[index]; 369 const auto& shader_config = maxwell3d.regs.shader_config[index];
325 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 370 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
338 } 383 }
339 continue; 384 continue;
340 } 385 }
341
342 // Currently this stages are not supported in the OpenGL backend. 386 // Currently this stages are not supported in the OpenGL backend.
343 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL 387 // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
344 if (program == Maxwell::ShaderProgram::TesselationControl || 388 if (program == Maxwell::ShaderProgram::TesselationControl ||
@@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
347 } 391 }
348 392
349 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders); 393 Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
350
351 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; 394 const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
352 switch (program) { 395 switch (program) {
353 case Maxwell::ShaderProgram::VertexA: 396 case Maxwell::ShaderProgram::VertexA:
@@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
363 default: 406 default:
364 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, 407 UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
365 shader_config.enable.Value(), shader_config.offset); 408 shader_config.enable.Value(), shader_config.offset);
409 break;
366 } 410 }
367 411
368 // Stage indices are 0 - 5 412 // Stage indices are 0 - 5
369 const std::size_t stage = index == 0 ? 0 : index - 1; 413 const size_t stage = index == 0 ? 0 : index - 1;
414 shaders[stage] = shader;
415
370 SetupDrawConstBuffers(stage, shader); 416 SetupDrawConstBuffers(stage, shader);
371 SetupDrawGlobalMemory(stage, shader); 417 SetupDrawGlobalMemory(stage, shader);
372 SetupDrawTextures(stage, shader); 418 SetupDrawTextures(shader, stage);
373 SetupDrawImages(stage, shader); 419 SetupDrawImages(shader, stage);
374 420
375 // Workaround for Intel drivers. 421 // Workaround for Intel drivers.
376 // When a clip distance is enabled but not set in the shader it crops parts of the screen 422 // When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
384 ++index; 430 ++index;
385 } 431 }
386 } 432 }
387
388 SyncClipEnabled(clip_distances); 433 SyncClipEnabled(clip_distances);
389 maxwell3d.dirty.flags[Dirty::Shaders] = false; 434 maxwell3d.dirty.flags[Dirty::Shaders] = false;
435
436 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
437 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
438
439 size_t image_view_index = 0;
440 size_t texture_index = 0;
441 size_t image_index = 0;
442 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
443 const Shader* const shader = shaders[stage];
444 if (shader) {
445 const auto base = device.GetBaseBindings(stage);
446 BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
447 texture_index, image_index);
448 }
449 }
390} 450}
391 451
392std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 452std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
417 shader_cache.LoadDiskCache(title_id, stop_loading, callback); 477 shader_cache.LoadDiskCache(title_id, stop_loading, callback);
418} 478}
419 479
420void RasterizerOpenGL::ConfigureFramebuffers() {
421 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
422 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
423 return;
424 }
425 maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
426
427 texture_cache.GuardRenderTargets(true);
428
429 View depth_surface = texture_cache.GetDepthBufferSurface(true);
430
431 const auto& regs = maxwell3d.regs;
432 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
433
434 // Bind the framebuffer surfaces
435 FramebufferCacheKey key;
436 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
437 for (std::size_t index = 0; index < colors_count; ++index) {
438 View color_surface{texture_cache.GetColorBufferSurface(index, true)};
439 if (!color_surface) {
440 continue;
441 }
442 // Assume that a surface will be written to if it is used as a framebuffer, even
443 // if the shader doesn't actually write to it.
444 texture_cache.MarkColorBufferInUse(index);
445
446 key.SetAttachment(index, regs.rt_control.GetMap(index));
447 key.colors[index] = std::move(color_surface);
448 }
449
450 if (depth_surface) {
451 // Assume that a surface will be written to if it is used as a framebuffer, even if
452 // the shader doesn't actually write to it.
453 texture_cache.MarkDepthBufferInUse();
454 key.zeta = std::move(depth_surface);
455 }
456
457 texture_cache.GuardRenderTargets(false);
458
459 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
460}
461
462void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
463 const auto& regs = maxwell3d.regs;
464
465 texture_cache.GuardRenderTargets(true);
466 View color_surface;
467
468 if (using_color) {
469 // Determine if we have to preserve the contents.
470 // First we have to make sure all clear masks are enabled.
471 bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
472 !regs.clear_buffers.B || !regs.clear_buffers.A;
473 const std::size_t index = regs.clear_buffers.RT;
474 if (regs.clear_flags.scissor) {
475 // Then we have to confirm scissor testing clears the whole image.
476 const auto& scissor = regs.scissor_test[0];
477 preserve_contents |= scissor.min_x > 0;
478 preserve_contents |= scissor.min_y > 0;
479 preserve_contents |= scissor.max_x < regs.rt[index].width;
480 preserve_contents |= scissor.max_y < regs.rt[index].height;
481 }
482
483 color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
484 texture_cache.MarkColorBufferInUse(index);
485 }
486
487 View depth_surface;
488 if (using_depth_stencil) {
489 bool preserve_contents = false;
490 if (regs.clear_flags.scissor) {
491 // For depth stencil clears we only have to confirm scissor test covers the whole image.
492 const auto& scissor = regs.scissor_test[0];
493 preserve_contents |= scissor.min_x > 0;
494 preserve_contents |= scissor.min_y > 0;
495 preserve_contents |= scissor.max_x < regs.zeta_width;
496 preserve_contents |= scissor.max_y < regs.zeta_height;
497 }
498
499 depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
500 texture_cache.MarkDepthBufferInUse();
501 }
502 texture_cache.GuardRenderTargets(false);
503
504 FramebufferCacheKey key;
505 key.colors[0] = std::move(color_surface);
506 key.zeta = std::move(depth_surface);
507
508 state_tracker.NotifyFramebuffer();
509 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
510}
511
512void RasterizerOpenGL::Clear() { 480void RasterizerOpenGL::Clear() {
513 if (!maxwell3d.ShouldExecute()) { 481 if (!maxwell3d.ShouldExecute()) {
514 return; 482 return;
@@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
523 regs.clear_buffers.A) { 491 regs.clear_buffers.A) {
524 use_color = true; 492 use_color = true;
525 493
526 state_tracker.NotifyColorMask0(); 494 const GLuint index = regs.clear_buffers.RT;
527 glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, 495 state_tracker.NotifyColorMask(index);
496 glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
528 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); 497 regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
529 498
530 // TODO(Rodrigo): Determine if clamping is used on clears 499 // TODO(Rodrigo): Determine if clamping is used on clears
@@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
557 state_tracker.NotifyScissor0(); 526 state_tracker.NotifyScissor0();
558 glDisablei(GL_SCISSOR_TEST, 0); 527 glDisablei(GL_SCISSOR_TEST, 0);
559 } 528 }
560
561 UNIMPLEMENTED_IF(regs.clear_flags.viewport); 529 UNIMPLEMENTED_IF(regs.clear_flags.viewport);
562 530
563 ConfigureClearFramebuffer(use_color, use_depth || use_stencil); 531 {
532 auto lock = texture_cache.AcquireLock();
533 texture_cache.UpdateRenderTargets(true);
534 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
535 }
564 536
565 if (use_color) { 537 if (use_color) {
566 glClearBufferfv(GL_COLOR, 0, regs.clear_color); 538 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
567 } 539 }
568
569 if (use_depth && use_stencil) { 540 if (use_depth && use_stencil) {
570 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 541 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
571 } else if (use_depth) { 542 } else if (use_depth) {
@@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
622 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 593 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
623 594
624 // Prepare the vertex array. 595 // Prepare the vertex array.
625 const bool invalidated = buffer_cache.Map(buffer_size); 596 buffer_cache.Map(buffer_size);
626
627 if (invalidated) {
628 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
629 auto& dirty = maxwell3d.dirty.flags;
630 dirty[Dirty::VertexBuffers] = true;
631 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
632 dirty[index] = true;
633 }
634 }
635 597
636 // Prepare vertex array format. 598 // Prepare vertex array format.
637 SetupVertexFormat(); 599 SetupVertexFormat();
@@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655 } 617 }
656 618
657 // Setup shaders and their used resources. 619 // Setup shaders and their used resources.
658 texture_cache.GuardSamplers(true); 620 auto lock = texture_cache.AcquireLock();
659 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 621 SetupShaders();
660 SetupShaders(primitive_mode);
661 texture_cache.GuardSamplers(false);
662
663 ConfigureFramebuffers();
664 622
665 // Signal the buffer cache that we are not going to upload more things. 623 // Signal the buffer cache that we are not going to upload more things.
666 buffer_cache.Unmap(); 624 buffer_cache.Unmap();
667 625 texture_cache.UpdateRenderTargets(false);
626 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
668 program_manager.BindGraphicsPipeline(); 627 program_manager.BindGraphicsPipeline();
669 628
670 if (texture_cache.TextureBarrier()) { 629 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
671 glTextureBarrier();
672 }
673
674 BeginTransformFeedback(primitive_mode); 630 BeginTransformFeedback(primitive_mode);
675 631
676 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); 632 const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
@@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
722 buffer_cache.Acquire(); 678 buffer_cache.Acquire();
723 current_cbuf = 0; 679 current_cbuf = 0;
724 680
725 auto kernel = shader_cache.GetComputeKernel(code_addr); 681 Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
726 program_manager.BindCompute(kernel->GetHandle());
727 682
728 SetupComputeTextures(kernel); 683 auto lock = texture_cache.AcquireLock();
729 SetupComputeImages(kernel); 684 BindComputeTextures(kernel);
730 685
731 const std::size_t buffer_size = 686 const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
732 Tegra::Engines::KeplerCompute::NumConstBuffers * 687 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
733 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
734 buffer_cache.Map(buffer_size); 688 buffer_cache.Map(buffer_size);
735 689
736 SetupComputeConstBuffers(kernel); 690 SetupComputeConstBuffers(kernel);
@@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
739 buffer_cache.Unmap(); 693 buffer_cache.Unmap();
740 694
741 const auto& launch_desc = kepler_compute.launch_description; 695 const auto& launch_desc = kepler_compute.launch_description;
742 program_manager.BindCompute(kernel->GetHandle());
743 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 696 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
744 ++num_queued_commands; 697 ++num_queued_commands;
745} 698}
@@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
760 if (addr == 0 || size == 0) { 713 if (addr == 0 || size == 0) {
761 return; 714 return;
762 } 715 }
763 texture_cache.FlushRegion(addr, size); 716 {
717 auto lock = texture_cache.AcquireLock();
718 texture_cache.DownloadMemory(addr, size);
719 }
764 buffer_cache.FlushRegion(addr, size); 720 buffer_cache.FlushRegion(addr, size);
765 query_cache.FlushRegion(addr, size); 721 query_cache.FlushRegion(addr, size);
766} 722}
@@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
769 if (!Settings::IsGPULevelHigh()) { 725 if (!Settings::IsGPULevelHigh()) {
770 return buffer_cache.MustFlushRegion(addr, size); 726 return buffer_cache.MustFlushRegion(addr, size);
771 } 727 }
772 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 728 return texture_cache.IsRegionGpuModified(addr, size) ||
729 buffer_cache.MustFlushRegion(addr, size);
773} 730}
774 731
775void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 732void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
@@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
777 if (addr == 0 || size == 0) { 734 if (addr == 0 || size == 0) {
778 return; 735 return;
779 } 736 }
780 texture_cache.InvalidateRegion(addr, size); 737 {
738 auto lock = texture_cache.AcquireLock();
739 texture_cache.WriteMemory(addr, size);
740 }
781 shader_cache.InvalidateRegion(addr, size); 741 shader_cache.InvalidateRegion(addr, size);
782 buffer_cache.InvalidateRegion(addr, size); 742 buffer_cache.InvalidateRegion(addr, size);
783 query_cache.InvalidateRegion(addr, size); 743 query_cache.InvalidateRegion(addr, size);
@@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
788 if (addr == 0 || size == 0) { 748 if (addr == 0 || size == 0) {
789 return; 749 return;
790 } 750 }
791 texture_cache.OnCPUWrite(addr, size); 751 {
752 auto lock = texture_cache.AcquireLock();
753 texture_cache.WriteMemory(addr, size);
754 }
792 shader_cache.OnCPUWrite(addr, size); 755 shader_cache.OnCPUWrite(addr, size);
793 buffer_cache.OnCPUWrite(addr, size); 756 buffer_cache.OnCPUWrite(addr, size);
794} 757}
795 758
796void RasterizerOpenGL::SyncGuestHost() { 759void RasterizerOpenGL::SyncGuestHost() {
797 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 760 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
798 texture_cache.SyncGuestHost();
799 buffer_cache.SyncGuestHost(); 761 buffer_cache.SyncGuestHost();
800 shader_cache.SyncGuestHost(); 762 shader_cache.SyncGuestHost();
801} 763}
802 764
765void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
766 {
767 auto lock = texture_cache.AcquireLock();
768 texture_cache.UnmapMemory(addr, size);
769 }
770 buffer_cache.OnCPUWrite(addr, size);
771 shader_cache.OnCPUWrite(addr, size);
772}
773
803void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 774void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
804 if (!gpu.IsAsync()) { 775 if (!gpu.IsAsync()) {
805 gpu_memory.Write<u32>(addr, value); 776 gpu_memory.Write<u32>(addr, value);
@@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
841 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); 812 GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
842} 813}
843 814
815void RasterizerOpenGL::FragmentBarrier() {
816 glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
817}
818
819void RasterizerOpenGL::TiledCacheBarrier() {
820 glTextureBarrier();
821}
822
844void RasterizerOpenGL::FlushCommands() { 823void RasterizerOpenGL::FlushCommands() {
845 // Only flush when we have commands queued to OpenGL. 824 // Only flush when we have commands queued to OpenGL.
846 if (num_queued_commands == 0) { 825 if (num_queued_commands == 0) {
@@ -854,45 +833,95 @@ void RasterizerOpenGL::TickFrame() {
854 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly. 833 // Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
855 num_queued_commands = 0; 834 num_queued_commands = 0;
856 835
836 fence_manager.TickFrame();
857 buffer_cache.TickFrame(); 837 buffer_cache.TickFrame();
838 {
839 auto lock = texture_cache.AcquireLock();
840 texture_cache.TickFrame();
841 }
858} 842}
859 843
860bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 844bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
861 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 845 const Tegra::Engines::Fermi2D::Surface& dst,
862 const Tegra::Engines::Fermi2D::Config& copy_config) { 846 const Tegra::Engines::Fermi2D::Config& copy_config) {
863 MICROPROFILE_SCOPE(OpenGL_Blits); 847 MICROPROFILE_SCOPE(OpenGL_Blits);
864 texture_cache.DoFermiCopy(src, dst, copy_config); 848 auto lock = texture_cache.AcquireLock();
849 texture_cache.BlitImage(dst, src, copy_config);
865 return true; 850 return true;
866} 851}
867 852
868bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, 853bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
869 VAddr framebuffer_addr, u32 pixel_stride) { 854 VAddr framebuffer_addr, u32 pixel_stride) {
870 if (!framebuffer_addr) { 855 if (framebuffer_addr == 0) {
871 return {}; 856 return false;
872 } 857 }
873
874 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 858 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
875 859
876 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 860 auto lock = texture_cache.AcquireLock();
877 if (!surface) { 861 ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
878 return {}; 862 if (!image_view) {
863 return false;
879 } 864 }
880
881 // Verify that the cached surface is the same size and format as the requested framebuffer 865 // Verify that the cached surface is the same size and format as the requested framebuffer
882 const auto& params{surface->GetSurfaceParams()}; 866 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
883 const auto& pixel_format{ 867 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
884 VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
885 ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
886 ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
887 868
888 if (params.pixel_format != pixel_format) { 869 screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
889 LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); 870 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
890 } 871 return true;
872}
891 873
892 screen_info.display_texture = surface->GetTexture(); 874void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
893 screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion; 875 image_view_indices.clear();
876 sampler_handles.clear();
894 877
895 return true; 878 texture_cache.SynchronizeComputeDescriptors();
879
880 SetupComputeTextures(kernel);
881 SetupComputeImages(kernel);
882
883 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
884 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
885
886 program_manager.BindCompute(kernel->GetHandle());
887 size_t image_view_index = 0;
888 size_t texture_index = 0;
889 size_t image_index = 0;
890 BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
891}
892
893void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
894 GLuint base_image, size_t& image_view_index,
895 size_t& texture_index, size_t& image_index) {
896 const GLuint* const samplers = sampler_handles.data() + texture_index;
897 const GLuint* const textures = texture_handles.data() + texture_index;
898 const GLuint* const images = image_handles.data() + image_index;
899
900 const size_t num_samplers = entries.samplers.size();
901 for (const auto& sampler : entries.samplers) {
902 for (size_t i = 0; i < sampler.size; ++i) {
903 const ImageViewId image_view_id = image_view_ids[image_view_index++];
904 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
905 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
906 texture_handles[texture_index++] = handle;
907 }
908 }
909 const size_t num_images = entries.images.size();
910 for (size_t unit = 0; unit < num_images; ++unit) {
911 // TODO: Mark as modified
912 const ImageViewId image_view_id = image_view_ids[image_view_index++];
913 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
914 const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
915 image_handles[image_index] = handle;
916 ++image_index;
917 }
918 if (num_samplers > 0) {
919 glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
920 glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
921 }
922 if (num_images > 0) {
923 glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
924 }
896} 925}
897 926
898void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { 927void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
@@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
999 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, 1028 GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
1000 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 1029 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
1001 }; 1030 };
1002
1003 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; 1031 const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
1004 const auto& entries{shader->GetEntries().global_memory_entries}; 1032 const auto& entries{shader->GetEntries().global_memory_entries};
1005 1033
@@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
1056 } 1084 }
1057} 1085}
1058 1086
1059void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { 1087void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
1060 MICROPROFILE_SCOPE(OpenGL_Texture); 1088 const bool via_header_index =
1061 u32 binding = device.GetBaseBindings(stage_index).sampler; 1089 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1062 for (const auto& entry : shader->GetEntries().samplers) { 1090 for (const auto& entry : shader->GetEntries().samplers) {
1063 const auto shader_type = static_cast<ShaderType>(stage_index); 1091 const auto shader_type = static_cast<ShaderType>(stage_index);
1064 for (std::size_t i = 0; i < entry.size; ++i) { 1092 for (size_t index = 0; index < entry.size; ++index) {
1065 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); 1093 const auto handle =
1066 SetupTexture(binding++, texture, entry); 1094 GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
1095 const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1096 sampler_handles.push_back(sampler->Handle());
1097 image_view_indices.push_back(handle.image);
1067 } 1098 }
1068 } 1099 }
1069} 1100}
1070 1101
1071void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { 1102void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
1072 MICROPROFILE_SCOPE(OpenGL_Texture); 1103 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1073 u32 binding = 0;
1074 for (const auto& entry : kernel->GetEntries().samplers) { 1104 for (const auto& entry : kernel->GetEntries().samplers) {
1075 for (std::size_t i = 0; i < entry.size; ++i) { 1105 for (size_t i = 0; i < entry.size; ++i) {
1076 const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); 1106 const auto handle =
1077 SetupTexture(binding++, texture, entry); 1107 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
1108 const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1109 sampler_handles.push_back(sampler->Handle());
1110 image_view_indices.push_back(handle.image);
1078 } 1111 }
1079 } 1112 }
1080} 1113}
1081 1114
1082void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 1115void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
1083 const SamplerEntry& entry) { 1116 const bool via_header_index =
1084 const auto view = texture_cache.GetTextureSurface(texture.tic, entry); 1117 maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1085 if (!view) {
1086 // Can occur when texture addr is null or its memory is unmapped/invalid
1087 glBindSampler(binding, 0);
1088 glBindTextureUnit(binding, 0);
1089 return;
1090 }
1091 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
1092 texture.tic.z_source, texture.tic.w_source);
1093 glBindTextureUnit(binding, handle);
1094 if (!view->GetSurfaceParams().IsBuffer()) {
1095 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
1096 }
1097}
1098
1099void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1100 u32 binding = device.GetBaseBindings(stage_index).image;
1101 for (const auto& entry : shader->GetEntries().images) { 1118 for (const auto& entry : shader->GetEntries().images) {
1102 const auto shader_type = static_cast<ShaderType>(stage_index); 1119 const auto shader_type = static_cast<ShaderType>(stage_index);
1103 const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; 1120 const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
1104 SetupImage(binding++, tic, entry); 1121 image_view_indices.push_back(handle.image);
1105 } 1122 }
1106} 1123}
1107 1124
1108void RasterizerOpenGL::SetupComputeImages(Shader* shader) { 1125void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
1109 u32 binding = 0; 1126 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1110 for (const auto& entry : shader->GetEntries().images) { 1127 for (const auto& entry : shader->GetEntries().images) {
1111 const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; 1128 const auto handle =
1112 SetupImage(binding++, tic, entry); 1129 GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
1130 image_view_indices.push_back(handle.image);
1113 } 1131 }
1114} 1132}
1115 1133
1116void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
1117 const ImageEntry& entry) {
1118 const auto view = texture_cache.GetImageSurface(tic, entry);
1119 if (!view) {
1120 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1121 return;
1122 }
1123 if (entry.is_written) {
1124 view->MarkAsModified(texture_cache.Tick());
1125 }
1126 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1127 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1128}
1129
1130void RasterizerOpenGL::SyncViewport() { 1134void RasterizerOpenGL::SyncViewport() {
1131 auto& flags = maxwell3d.dirty.flags; 1135 auto& flags = maxwell3d.dirty.flags;
1132 const auto& regs = maxwell3d.regs; 1136 const auto& regs = maxwell3d.regs;
@@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
1526 flags[Dirty::PointSize] = false; 1530 flags[Dirty::PointSize] = false;
1527 1531
1528 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); 1532 oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
1533 oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
1529 1534
1530 if (maxwell3d.regs.vp_point_size.enable) {
1531 // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
1532 glEnable(GL_PROGRAM_POINT_SIZE);
1533 return;
1534 }
1535
1536 // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
1537 // in OpenGL).
1538 glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); 1535 glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
1539 glDisable(GL_PROGRAM_POINT_SIZE);
1540} 1536}
1541 1537
1542void RasterizerOpenGL::SyncLineState() { 1538void RasterizerOpenGL::SyncLineState() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index de28cff15..82e03e677 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,12 +7,13 @@
7#include <array> 7#include <array>
8#include <atomic> 8#include <atomic>
9#include <cstddef> 9#include <cstddef>
10#include <map>
11#include <memory> 10#include <memory>
12#include <optional> 11#include <optional>
13#include <tuple> 12#include <tuple>
14#include <utility> 13#include <utility>
15 14
15#include <boost/container/static_vector.hpp>
16
16#include <glad/glad.h> 17#include <glad/glad.h>
17 18
18#include "common/common_types.h" 19#include "common/common_types.h"
@@ -23,16 +24,14 @@
23#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
24#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
25#include "video_core/renderer_opengl/gl_fence_manager.h" 26#include "video_core/renderer_opengl/gl_fence_manager.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h" 27#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 28#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/gl_shader_decompiler.h" 30#include "video_core/renderer_opengl/gl_shader_decompiler.h"
32#include "video_core/renderer_opengl/gl_shader_manager.h" 31#include "video_core/renderer_opengl/gl_shader_manager.h"
33#include "video_core/renderer_opengl/gl_state_tracker.h" 32#include "video_core/renderer_opengl/gl_state_tracker.h"
33#include "video_core/renderer_opengl/gl_stream_buffer.h"
34#include "video_core/renderer_opengl/gl_texture_cache.h" 34#include "video_core/renderer_opengl/gl_texture_cache.h"
35#include "video_core/renderer_opengl/utils.h"
36#include "video_core/shader/async_shaders.h" 35#include "video_core/shader/async_shaders.h"
37#include "video_core/textures/texture.h" 36#include "video_core/textures/texture.h"
38 37
@@ -51,7 +50,7 @@ class MemoryManager;
51namespace OpenGL { 50namespace OpenGL {
52 51
53struct ScreenInfo; 52struct ScreenInfo;
54struct DrawParameters; 53struct ShaderEntries;
55 54
56struct BindlessSSBO { 55struct BindlessSSBO {
57 GLuint64EXT address; 56 GLuint64EXT address;
@@ -79,15 +78,18 @@ public:
79 void InvalidateRegion(VAddr addr, u64 size) override; 78 void InvalidateRegion(VAddr addr, u64 size) override;
80 void OnCPUWrite(VAddr addr, u64 size) override; 79 void OnCPUWrite(VAddr addr, u64 size) override;
81 void SyncGuestHost() override; 80 void SyncGuestHost() override;
81 void UnmapMemory(VAddr addr, u64 size) override;
82 void SignalSemaphore(GPUVAddr addr, u32 value) override; 82 void SignalSemaphore(GPUVAddr addr, u32 value) override;
83 void SignalSyncPoint(u32 value) override; 83 void SignalSyncPoint(u32 value) override;
84 void ReleaseFences() override; 84 void ReleaseFences() override;
85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 85 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
86 void WaitForIdle() override; 86 void WaitForIdle() override;
87 void FragmentBarrier() override;
88 void TiledCacheBarrier() override;
87 void FlushCommands() override; 89 void FlushCommands() override;
88 void TickFrame() override; 90 void TickFrame() override;
89 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 91 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
90 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 92 const Tegra::Engines::Fermi2D::Surface& dst,
91 const Tegra::Engines::Fermi2D::Config& copy_config) override; 93 const Tegra::Engines::Fermi2D::Config& copy_config) override;
92 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 94 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
93 u32 pixel_stride) override; 95 u32 pixel_stride) override;
@@ -108,11 +110,14 @@ public:
108 } 110 }
109 111
110private: 112private:
111 /// Configures the color and depth framebuffer states. 113 static constexpr size_t MAX_TEXTURES = 192;
112 void ConfigureFramebuffers(); 114 static constexpr size_t MAX_IMAGES = 48;
115 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
116
117 void BindComputeTextures(Shader* kernel);
113 118
114 /// Configures the color and depth framebuffer for clearing. 119 void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
115 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 120 size_t& image_view_index, size_t& texture_index, size_t& image_index);
116 121
117 /// Configures the current constbuffers to use for the draw command. 122 /// Configures the current constbuffers to use for the draw command.
118 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); 123 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
@@ -136,23 +141,16 @@ private:
136 size_t size, BindlessSSBO* ssbo); 141 size_t size, BindlessSSBO* ssbo);
137 142
138 /// Configures the current textures to use for the draw command. 143 /// Configures the current textures to use for the draw command.
139 void SetupDrawTextures(std::size_t stage_index, Shader* shader); 144 void SetupDrawTextures(const Shader* shader, size_t stage_index);
140 145
141 /// Configures the textures used in a compute shader. 146 /// Configures the textures used in a compute shader.
142 void SetupComputeTextures(Shader* kernel); 147 void SetupComputeTextures(const Shader* kernel);
143
144 /// Configures a texture.
145 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
146 const SamplerEntry& entry);
147 148
148 /// Configures images in a graphics shader. 149 /// Configures images in a graphics shader.
149 void SetupDrawImages(std::size_t stage_index, Shader* shader); 150 void SetupDrawImages(const Shader* shader, size_t stage_index);
150 151
151 /// Configures images in a compute shader. 152 /// Configures images in a compute shader.
152 void SetupComputeImages(Shader* shader); 153 void SetupComputeImages(const Shader* shader);
153
154 /// Configures an image.
155 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
156 154
157 /// Syncs the viewport and depth range to match the guest state 155 /// Syncs the viewport and depth range to match the guest state
158 void SyncViewport(); 156 void SyncViewport();
@@ -227,9 +225,6 @@ private:
227 /// End a transform feedback 225 /// End a transform feedback
228 void EndTransformFeedback(); 226 void EndTransformFeedback();
229 227
230 /// Check for extension that are not strictly required but are needed for correct emulation
231 void CheckExtensions();
232
233 std::size_t CalculateVertexArraysSize() const; 228 std::size_t CalculateVertexArraysSize() const;
234 229
235 std::size_t CalculateIndexBufferSize() const; 230 std::size_t CalculateIndexBufferSize() const;
@@ -242,7 +237,7 @@ private:
242 237
243 GLintptr SetupIndexBuffer(); 238 GLintptr SetupIndexBuffer();
244 239
245 void SetupShaders(GLenum primitive_mode); 240 void SetupShaders();
246 241
247 Tegra::GPU& gpu; 242 Tegra::GPU& gpu;
248 Tegra::Engines::Maxwell3D& maxwell3d; 243 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -254,19 +249,21 @@ private:
254 ProgramManager& program_manager; 249 ProgramManager& program_manager;
255 StateTracker& state_tracker; 250 StateTracker& state_tracker;
256 251
257 TextureCacheOpenGL texture_cache; 252 OGLStreamBuffer stream_buffer;
253 TextureCacheRuntime texture_cache_runtime;
254 TextureCache texture_cache;
258 ShaderCacheOpenGL shader_cache; 255 ShaderCacheOpenGL shader_cache;
259 SamplerCacheOpenGL sampler_cache;
260 FramebufferCacheOpenGL framebuffer_cache;
261 QueryCache query_cache; 256 QueryCache query_cache;
262 OGLBufferCache buffer_cache; 257 OGLBufferCache buffer_cache;
263 FenceManagerOpenGL fence_manager; 258 FenceManagerOpenGL fence_manager;
264 259
265 VideoCommon::Shader::AsyncShaders async_shaders; 260 VideoCommon::Shader::AsyncShaders async_shaders;
266 261
267 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 262 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
268 263 std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
269 GLint vertex_binding = 0; 264 boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
265 std::array<GLuint, MAX_TEXTURES> texture_handles;
266 std::array<GLuint, MAX_IMAGES> image_handles;
270 267
271 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> 268 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
272 transform_feedback_buffers; 269 transform_feedback_buffers;
@@ -280,7 +277,7 @@ private:
280 std::size_t current_cbuf = 0; 277 std::size_t current_cbuf = 0;
281 OGLBuffer unified_uniform_buffer; 278 OGLBuffer unified_uniform_buffer;
282 279
283 /// Number of commands queued to the OpenGL driver. Reseted on flush. 280 /// Number of commands queued to the OpenGL driver. Resetted on flush.
284 std::size_t num_queued_commands = 0; 281 std::size_t num_queued_commands = 0;
285 282
286 u32 last_clip_distance_mask = 0; 283 u32 last_clip_distance_mask = 0;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 0ebcec427..0e34a0f20 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -71,7 +71,7 @@ void OGLSampler::Create() {
71 return; 71 return;
72 72
73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 73 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
74 glGenSamplers(1, &handle); 74 glCreateSamplers(1, &handle);
75} 75}
76 76
77void OGLSampler::Release() { 77void OGLSampler::Release() {
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
deleted file mode 100644
index 5c174879a..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/logging/log.h"
6#include "video_core/renderer_opengl/gl_resource_manager.h"
7#include "video_core/renderer_opengl/gl_sampler_cache.h"
8#include "video_core/renderer_opengl/maxwell_to_gl.h"
9
10namespace OpenGL {
11
12SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
13
14SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
15
16OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
17 OGLSampler sampler;
18 sampler.Create();
19
20 const GLuint sampler_id{sampler.handle};
21 glSamplerParameteri(
22 sampler_id, GL_TEXTURE_MAG_FILTER,
23 MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
24 glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
25 MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
26 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
27 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
28 glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
29 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
30 tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
31 glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
32 MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
33 glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
34 glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
35 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
36 glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
37 if (GLAD_GL_ARB_texture_filter_anisotropic) {
38 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
39 } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
40 glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
41 } else {
42 LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
43 }
44
45 return sampler;
46}
47
48GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
49 return sampler.handle;
50}
51
52} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
deleted file mode 100644
index 34ee37f00..000000000
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ /dev/null
@@ -1,25 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <glad/glad.h>
8
9#include "video_core/renderer_opengl/gl_resource_manager.h"
10#include "video_core/sampler_cache.h"
11
12namespace OpenGL {
13
14class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
15public:
16 explicit SamplerCacheOpenGL();
17 ~SamplerCacheOpenGL();
18
19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21
22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23};
24
25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index eabfdea5d..d4841fdb7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -27,7 +27,6 @@
27#include "video_core/renderer_opengl/gl_shader_decompiler.h" 27#include "video_core/renderer_opengl/gl_shader_decompiler.h"
28#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 28#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
29#include "video_core/renderer_opengl/gl_state_tracker.h" 29#include "video_core/renderer_opengl/gl_state_tracker.h"
30#include "video_core/renderer_opengl/utils.h"
31#include "video_core/shader/memory_util.h" 30#include "video_core/shader/memory_util.h"
32#include "video_core/shader/registry.h" 31#include "video_core/shader/registry.h"
33#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ccbdfe967..2e1fa252d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType; 40using Tegra::Shader::TextureType;
41using VideoCommon::Shader::BuildTransformFeedback;
42using VideoCommon::Shader::Registry;
43 41
44using namespace std::string_literals;
45using namespace VideoCommon::Shader; 42using namespace VideoCommon::Shader;
43using namespace std::string_literals;
46 44
47using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
48using Operation = const OperationNode&; 46using Operation = const OperationNode&;
@@ -2753,11 +2751,11 @@ private:
2753 } 2751 }
2754 } 2752 }
2755 2753
2756 std::string GetSampler(const Sampler& sampler) const { 2754 std::string GetSampler(const SamplerEntry& sampler) const {
2757 return AppendSuffix(sampler.index, "sampler"); 2755 return AppendSuffix(sampler.index, "sampler");
2758 } 2756 }
2759 2757
2760 std::string GetImage(const Image& image) const { 2758 std::string GetImage(const ImageEntry& image) const {
2761 return AppendSuffix(image.index, "image"); 2759 return AppendSuffix(image.index, "image");
2762 } 2760 }
2763 2761
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index c4ff47875..be68994bb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,8 +20,8 @@ namespace OpenGL {
20class Device; 20class Device;
21 21
22using Maxwell = Tegra::Engines::Maxwell3D::Regs; 22using Maxwell = Tegra::Engines::Maxwell3D::Regs;
23using SamplerEntry = VideoCommon::Shader::Sampler; 23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using ImageEntry = VideoCommon::Shader::Image; 24using ImageEntry = VideoCommon::Shader::ImageEntry;
25 25
26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 26class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
27public: 27public:
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 691c6c79b..553e6e8d6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
83 } 83 }
84} 84}
85 85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
86void ProgramManager::UseVertexShader(GLuint program) { 101void ProgramManager::UseVertexShader(GLuint program) {
87 if (use_assembly_programs) { 102 if (use_assembly_programs) {
88 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); 103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 950e0dfcb..ad42cce74 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,12 @@ public:
45 /// Rewinds BindHostPipeline state changes. 45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline(); 46 void RestoreGuestPipeline();
47 47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50
51 /// Rewinds BindHostCompute state changes.
52 void RestoreGuestCompute();
53
48 void UseVertexShader(GLuint program); 54 void UseVertexShader(GLuint program);
49 void UseGeometryShader(GLuint program); 55 void UseGeometryShader(GLuint program);
50 void UseFragmentShader(GLuint program); 56 void UseFragmentShader(GLuint program);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 45f4fc565..60e6fa39f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
249 } 249 }
250} 250}
251 251
252void StateTracker::InvalidateStreamBuffer() {
253 flags[Dirty::VertexBuffers] = true;
254 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
255 flags[index] = true;
256 }
257}
258
252} // namespace OpenGL 259} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 9d127548f..574615d3c 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -92,6 +92,8 @@ class StateTracker {
92public: 92public:
93 explicit StateTracker(Tegra::GPU& gpu); 93 explicit StateTracker(Tegra::GPU& gpu);
94 94
95 void InvalidateStreamBuffer();
96
95 void BindIndexBuffer(GLuint new_index_buffer) { 97 void BindIndexBuffer(GLuint new_index_buffer) {
96 if (index_buffer == new_index_buffer) { 98 if (index_buffer == new_index_buffer) {
97 return; 99 return;
@@ -100,6 +102,14 @@ public:
100 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); 102 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
101 } 103 }
102 104
105 void BindFramebuffer(GLuint new_framebuffer) {
106 if (framebuffer == new_framebuffer) {
107 return;
108 }
109 framebuffer = new_framebuffer;
110 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
111 }
112
103 void NotifyScreenDrawVertexArray() { 113 void NotifyScreenDrawVertexArray() {
104 flags[OpenGL::Dirty::VertexFormats] = true; 114 flags[OpenGL::Dirty::VertexFormats] = true;
105 flags[OpenGL::Dirty::VertexFormat0 + 0] = true; 115 flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
@@ -129,9 +139,9 @@ public:
129 flags[OpenGL::Dirty::Scissor0] = true; 139 flags[OpenGL::Dirty::Scissor0] = true;
130 } 140 }
131 141
132 void NotifyColorMask0() { 142 void NotifyColorMask(size_t index) {
133 flags[OpenGL::Dirty::ColorMasks] = true; 143 flags[OpenGL::Dirty::ColorMasks] = true;
134 flags[OpenGL::Dirty::ColorMask0] = true; 144 flags[OpenGL::Dirty::ColorMask0 + index] = true;
135 } 145 }
136 146
137 void NotifyBlend0() { 147 void NotifyBlend0() {
@@ -190,6 +200,7 @@ public:
190private: 200private:
191 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; 201 Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
192 202
203 GLuint framebuffer = 0;
193 GLuint index_buffer = 0; 204 GLuint index_buffer = 0;
194}; 205};
195 206
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 887995cf4..e0819cdf2 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,6 +9,7 @@
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h" 11#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
13 14
14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 15MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
16 17
17namespace OpenGL { 18namespace OpenGL {
18 19
19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) 20OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
20 : buffer_size(size) { 21 : state_tracker{state_tracker_} {
21 gl_buffer.Create(); 22 gl_buffer.Create();
22 23
23 GLsizeiptr allocate_size = size;
24 if (vertex_data_usage) {
25 // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
26 // read position is near the end and is an out-of-bound access to the vertex buffer. This is
27 // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
28 // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
29 // crash.
30 allocate_size *= 2;
31 }
32
33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; 24 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 25 glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
35 mapped_ptr = static_cast<u8*>( 26 mapped_ptr = static_cast<u8*>(
36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); 27 glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
37 28
38 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { 29 if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); 30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
@@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
46 gl_buffer.Release(); 37 gl_buffer.Release();
47} 38}
48 39
49std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { 40std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
50 ASSERT(size <= buffer_size); 41 ASSERT(size <= BUFFER_SIZE);
51 ASSERT(alignment <= buffer_size); 42 ASSERT(alignment <= BUFFER_SIZE);
52 mapped_size = size; 43 mapped_size = size;
53 44
54 if (alignment > 0) { 45 if (alignment > 0) {
55 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); 46 buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
56 } 47 }
57 48
58 bool invalidate = false; 49 if (buffer_pos + size > BUFFER_SIZE) {
59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer); 50 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle); 51 glInvalidateBufferData(gl_buffer.handle);
52 state_tracker.InvalidateStreamBuffer();
62 53
63 buffer_pos = 0; 54 buffer_pos = 0;
64 invalidate = true;
65 } 55 }
66 56
67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); 57 return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
68} 58}
69 59
70void OGLStreamBuffer::Unmap(GLsizeiptr size) { 60void OGLStreamBuffer::Unmap(GLsizeiptr size) {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 307a67113..dd9cf67eb 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -4,29 +4,31 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <tuple> 7#include <utility>
8
8#include <glad/glad.h> 9#include <glad/glad.h>
10
9#include "common/common_types.h" 11#include "common/common_types.h"
10#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
11 13
12namespace OpenGL { 14namespace OpenGL {
13 15
14class Device; 16class Device;
17class StateTracker;
15 18
16class OGLStreamBuffer : private NonCopyable { 19class OGLStreamBuffer : private NonCopyable {
17public: 20public:
18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); 21 explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
19 ~OGLStreamBuffer(); 22 ~OGLStreamBuffer();
20 23
21 /* 24 /*
22 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes 25 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
23 * and the optional alignment requirement. 26 * and the optional alignment requirement.
24 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. 27 * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
25 * The return values are the pointer to the new chunk, the offset within the buffer, 28 * The return values are the pointer to the new chunk, and the offset within the buffer.
26 * and the invalidation flag for previous chunks.
27 * The actual used size must be specified on unmapping the chunk. 29 * The actual used size must be specified on unmapping the chunk.
28 */ 30 */
29 std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); 31 std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
30 32
31 void Unmap(GLsizeiptr size); 33 void Unmap(GLsizeiptr size);
32 34
@@ -39,15 +41,18 @@ public:
39 } 41 }
40 42
41 GLsizeiptr Size() const noexcept { 43 GLsizeiptr Size() const noexcept {
42 return buffer_size; 44 return BUFFER_SIZE;
43 } 45 }
44 46
45private: 47private:
48 static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
49
50 StateTracker& state_tracker;
51
46 OGLBuffer gl_buffer; 52 OGLBuffer gl_buffer;
47 53
48 GLuint64EXT gpu_address = 0; 54 GLuint64EXT gpu_address = 0;
49 GLintptr buffer_pos = 0; 55 GLintptr buffer_pos = 0;
50 GLsizeiptr buffer_size = 0;
51 GLsizeiptr mapped_size = 0; 56 GLsizeiptr mapped_size = 0;
52 u8* mapped_ptr = nullptr; 57 u8* mapped_ptr = nullptr;
53}; 58};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index daf352b50..4c690418c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -2,173 +2,238 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include <algorithm>
6#include "common/bit_util.h" 6#include <array>
7#include "common/common_types.h" 7#include <bit>
8#include "common/microprofile.h" 8#include <string>
9#include "common/scope_exit.h" 9
10#include "core/core.h" 10#include <glad/glad.h>
11#include "video_core/morton.h" 11
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h"
13#include "video_core/renderer_opengl/gl_state_tracker.h" 14#include "video_core/renderer_opengl/gl_state_tracker.h"
14#include "video_core/renderer_opengl/gl_texture_cache.h" 15#include "video_core/renderer_opengl/gl_texture_cache.h"
15#include "video_core/renderer_opengl/utils.h" 16#include "video_core/renderer_opengl/maxwell_to_gl.h"
16#include "video_core/texture_cache/surface_base.h" 17#include "video_core/renderer_opengl/util_shaders.h"
18#include "video_core/surface.h"
19#include "video_core/texture_cache/format_lookup_table.h"
20#include "video_core/texture_cache/samples_helper.h"
17#include "video_core/texture_cache/texture_cache.h" 21#include "video_core/texture_cache/texture_cache.h"
18#include "video_core/textures/convert.h" 22#include "video_core/textures/decoders.h"
19#include "video_core/textures/texture.h"
20 23
21namespace OpenGL { 24namespace OpenGL {
22 25
23using Tegra::Texture::SwizzleSource; 26namespace {
24using VideoCore::MortonSwizzleMode;
25 27
28using Tegra::Texture::SwizzleSource;
29using Tegra::Texture::TextureMipmapFilter;
30using Tegra::Texture::TextureType;
31using Tegra::Texture::TICEntry;
32using Tegra::Texture::TSCEntry;
33using VideoCommon::CalculateLevelStrideAlignment;
34using VideoCommon::ImageCopy;
35using VideoCommon::ImageFlagBits;
36using VideoCommon::ImageType;
37using VideoCommon::NUM_RT;
38using VideoCommon::SamplesLog2;
39using VideoCommon::SwizzleParameters;
40using VideoCore::Surface::BytesPerBlock;
41using VideoCore::Surface::IsPixelFormatASTC;
42using VideoCore::Surface::IsPixelFormatSRGB;
43using VideoCore::Surface::MaxPixelFormat;
26using VideoCore::Surface::PixelFormat; 44using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::SurfaceTarget;
28using VideoCore::Surface::SurfaceType; 45using VideoCore::Surface::SurfaceType;
29 46
30MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 47struct CopyOrigin {
31MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 48 GLint level;
32MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", 49 GLint x;
33 MP_RGB(128, 192, 128)); 50 GLint y;
51 GLint z;
52};
34 53
35namespace { 54struct CopyRegion {
55 GLsizei width;
56 GLsizei height;
57 GLsizei depth;
58};
36 59
37struct FormatTuple { 60struct FormatTuple {
38 GLenum internal_format; 61 GLenum internal_format;
39 GLenum format = GL_NONE; 62 GLenum format = GL_NONE;
40 GLenum type = GL_NONE; 63 GLenum type = GL_NONE;
64 GLenum store_format = internal_format;
41}; 65};
42 66
43constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 67constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
44 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM 68 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
45 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM 69 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
46 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT 70 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
47 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT 71 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
48 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM 72 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM 73 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
50 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM 74 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
51 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM 75 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
52 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT 76 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
53 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM 77 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
54 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM 78 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
55 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM 79 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
56 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT 80 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
57 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT 81 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
58 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT 82 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
59 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM 83 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
60 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM 84 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
61 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT 85 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
62 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT 86 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
63 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT 87 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
64 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT 88 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
65 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM 89 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
66 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM 90 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
67 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM 91 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
68 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM 92 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
69 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM 93 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
70 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM 94 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
71 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM 95 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
72 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM 96 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
73 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT 97 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
74 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT 98 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
75 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM 99 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
76 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 100 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
77 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT 101 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
78 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT 102 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
79 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT 103 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
80 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT 104 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
81 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT 105 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
82 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT 106 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
83 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM 107 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
84 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM 108 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
85 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT 109 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
86 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT 110 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
87 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM 111 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
88 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT 112 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
89 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT 113 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
90 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT 114 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
91 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM 115 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
92 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT 116 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
93 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB 117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_RGBA8}, // A8B8G8R8_SRGB
94 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM 118 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
95 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM 119 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
96 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT 120 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
97 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT 121 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
98 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT 122 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
99 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT 123 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
100 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT 124 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
101 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT 125 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
102 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM 126 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
103 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM 127 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
104 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM 128 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
105 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM 129 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, GL_RGBA8}, // B8G8R8A8_UNORM
106 // Compressed sRGB formats 130 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
107 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB 131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
108 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB 132 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
109 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB 133 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
110 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB 134 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
111 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM 135 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
112 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB 136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
113 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB 137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
114 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB 138 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
115 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB 139 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
116 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM 140 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
117 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB 141 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
118 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM 142 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
119 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB 143 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
120 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM 144 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
121 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB 145 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
122 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM 146 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
123 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB 147 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
124 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM 148 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
125 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB 149 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
126 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM 150 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
127 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB 151 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM 152 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
129 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB 153 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
130 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT 154 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
131 155 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
132 // Depth formats 156 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
133 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT 157 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
134 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
135
136 // DepthStencil formats
137 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
138 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
139 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, 158 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
140 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT 159 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
141}}; 160}};
142 161
162constexpr std::array ACCELERATED_FORMATS{
163 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
164 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
165 GL_RG16UI, GL_RG8UI, GL_R32UI, GL_R16UI, GL_R8UI, GL_RGBA32I,
166 GL_RGBA16I, GL_RGBA8I, GL_RG32I, GL_RG16I, GL_RG8I, GL_R32I,
167 GL_R16I, GL_R8I, GL_RGBA16, GL_RGB10_A2, GL_RGBA8, GL_RG16,
168 GL_RG8, GL_R16, GL_R8, GL_RGBA16_SNORM, GL_RGBA8_SNORM, GL_RG16_SNORM,
169 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
170};
171
143const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { 172const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
144 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 173 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
145 return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 174 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
146} 175}
147 176
148GLenum GetTextureTarget(const SurfaceTarget& target) { 177GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
149 switch (target) { 178 switch (info.type) {
150 case SurfaceTarget::TextureBuffer: 179 case ImageType::e1D:
180 return GL_TEXTURE_1D_ARRAY;
181 case ImageType::e2D:
182 if (info.num_samples > 1) {
183 return GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
184 }
185 return GL_TEXTURE_2D_ARRAY;
186 case ImageType::e3D:
187 return GL_TEXTURE_3D;
188 case ImageType::Linear:
189 return GL_TEXTURE_2D_ARRAY;
190 case ImageType::Buffer:
151 return GL_TEXTURE_BUFFER; 191 return GL_TEXTURE_BUFFER;
152 case SurfaceTarget::Texture1D: 192 }
193 UNREACHABLE_MSG("Invalid image type={}", info.type);
194 return GL_NONE;
195}
196
197GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
198 const bool is_multisampled = num_samples > 1;
199 switch (type) {
200 case ImageViewType::e1D:
153 return GL_TEXTURE_1D; 201 return GL_TEXTURE_1D;
154 case SurfaceTarget::Texture2D: 202 case ImageViewType::e2D:
155 return GL_TEXTURE_2D; 203 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
156 case SurfaceTarget::Texture3D: 204 case ImageViewType::Cube:
205 return GL_TEXTURE_CUBE_MAP;
206 case ImageViewType::e3D:
157 return GL_TEXTURE_3D; 207 return GL_TEXTURE_3D;
158 case SurfaceTarget::Texture1DArray: 208 case ImageViewType::e1DArray:
159 return GL_TEXTURE_1D_ARRAY; 209 return GL_TEXTURE_1D_ARRAY;
160 case SurfaceTarget::Texture2DArray: 210 case ImageViewType::e2DArray:
161 return GL_TEXTURE_2D_ARRAY; 211 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
162 case SurfaceTarget::TextureCubemap: 212 case ImageViewType::CubeArray:
163 return GL_TEXTURE_CUBE_MAP;
164 case SurfaceTarget::TextureCubeArray:
165 return GL_TEXTURE_CUBE_MAP_ARRAY; 213 return GL_TEXTURE_CUBE_MAP_ARRAY;
214 case ImageViewType::Rect:
215 return GL_TEXTURE_RECTANGLE;
216 case ImageViewType::Buffer:
217 return GL_TEXTURE_BUFFER;
166 } 218 }
167 UNREACHABLE(); 219 UNREACHABLE_MSG("Invalid image view type={}", type);
168 return {}; 220 return GL_NONE;
169} 221}
170 222
171GLint GetSwizzleSource(SwizzleSource source) { 223GLenum TextureMode(PixelFormat format, bool is_first) {
224 switch (format) {
225 case PixelFormat::D24_UNORM_S8_UINT:
226 case PixelFormat::D32_FLOAT_S8_UINT:
227 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
228 case PixelFormat::S8_UINT_D24_UNORM:
229 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
230 default:
231 UNREACHABLE();
232 return GL_DEPTH_COMPONENT;
233 }
234}
235
236GLint Swizzle(SwizzleSource source) {
172 switch (source) { 237 switch (source) {
173 case SwizzleSource::Zero: 238 case SwizzleSource::Zero:
174 return GL_ZERO; 239 return GL_ZERO;
@@ -184,530 +249,813 @@ GLint GetSwizzleSource(SwizzleSource source) {
184 case SwizzleSource::OneFloat: 249 case SwizzleSource::OneFloat:
185 return GL_ONE; 250 return GL_ONE;
186 } 251 }
187 UNREACHABLE(); 252 UNREACHABLE_MSG("Invalid swizzle source={}", source);
188 return GL_NONE; 253 return GL_NONE;
189} 254}
190 255
191GLenum GetComponent(PixelFormat format, bool is_first) { 256GLenum AttachmentType(PixelFormat format) {
192 switch (format) { 257 switch (const SurfaceType type = VideoCore::Surface::GetFormatType(format); type) {
193 case PixelFormat::D24_UNORM_S8_UINT: 258 case SurfaceType::Depth:
194 case PixelFormat::D32_FLOAT_S8_UINT: 259 return GL_DEPTH_ATTACHMENT;
195 return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX; 260 case SurfaceType::DepthStencil:
196 case PixelFormat::S8_UINT_D24_UNORM: 261 return GL_DEPTH_STENCIL_ATTACHMENT;
197 return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
198 default: 262 default:
199 UNREACHABLE(); 263 UNIMPLEMENTED_MSG("Unimplemented type={}", type);
200 return GL_DEPTH_COMPONENT; 264 return GL_NONE;
201 } 265 }
202} 266}
203 267
204void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { 268[[nodiscard]] bool IsConverted(const Device& device, PixelFormat format, ImageType type) {
205 if (params.IsBuffer()) { 269 if (!device.HasASTC() && IsPixelFormatASTC(format)) {
206 return; 270 return true;
207 } 271 }
208 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 272 switch (format) {
209 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 273 case PixelFormat::BC4_UNORM:
210 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 274 case PixelFormat::BC5_UNORM:
211 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 275 return type == ImageType::e3D;
212 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1)); 276 default:
213 if (params.num_levels == 1) { 277 break;
214 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
215 } 278 }
279 return false;
216} 280}
217 281
218OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, 282[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
219 OGLBuffer& texture_buffer) { 283 switch (value) {
220 OGLTexture texture; 284 case SwizzleSource::G:
221 texture.Create(target); 285 return SwizzleSource::R;
286 default:
287 return value;
288 }
289}
222 290
223 switch (params.target) { 291void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4> swizzle) {
224 case SurfaceTarget::Texture1D: 292 switch (format) {
225 glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); 293 case PixelFormat::D24_UNORM_S8_UINT:
226 break; 294 case PixelFormat::D32_FLOAT_S8_UINT:
227 case SurfaceTarget::TextureBuffer: 295 case PixelFormat::S8_UINT_D24_UNORM:
228 texture_buffer.Create(); 296 UNIMPLEMENTED_IF(swizzle[0] != SwizzleSource::R && swizzle[0] != SwizzleSource::G);
229 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), 297 glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
230 nullptr, GL_DYNAMIC_STORAGE_BIT); 298 TextureMode(format, swizzle[0] == SwizzleSource::R));
231 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); 299 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
232 break; 300 break;
233 case SurfaceTarget::Texture2D: 301 default:
234 case SurfaceTarget::TextureCubemap:
235 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
236 params.height);
237 break; 302 break;
238 case SurfaceTarget::Texture3D: 303 }
239 case SurfaceTarget::Texture2DArray: 304 std::array<GLint, 4> gl_swizzle;
240 case SurfaceTarget::TextureCubeArray: 305 std::ranges::transform(swizzle, gl_swizzle.begin(), Swizzle);
241 glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, 306 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
242 params.height, params.depth); 307}
308
309[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
310 const VideoCommon::ImageInfo& info) {
311 // Disable accelerated uploads for now as they don't implement swizzled uploads
312 return false;
313 switch (info.type) {
314 case ImageType::e2D:
315 case ImageType::e3D:
316 case ImageType::Linear:
243 break; 317 break;
244 default: 318 default:
245 UNREACHABLE(); 319 return false;
320 }
321 const GLenum internal_format = GetFormatTuple(info.format).internal_format;
322 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
323 if (format_info.is_compressed) {
324 return false;
325 }
326 if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) {
327 return false;
246 } 328 }
329 if (format_info.compatibility_by_size) {
330 return true;
331 }
332 const GLenum store_format = StoreFormat(BytesPerBlock(info.format));
333 const GLenum store_class = runtime.FormatInfo(info.type, store_format).compatibility_class;
334 return format_info.compatibility_class == store_class;
335}
247 336
248 ApplyTextureDefaults(params, texture.handle); 337[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
338 VideoCommon::SubresourceLayers subresource, GLenum target) {
339 switch (target) {
340 case GL_TEXTURE_2D_ARRAY:
341 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
342 return CopyOrigin{
343 .level = static_cast<GLint>(subresource.base_level),
344 .x = static_cast<GLint>(offset.x),
345 .y = static_cast<GLint>(offset.y),
346 .z = static_cast<GLint>(subresource.base_layer),
347 };
348 case GL_TEXTURE_3D:
349 return CopyOrigin{
350 .level = static_cast<GLint>(subresource.base_level),
351 .x = static_cast<GLint>(offset.x),
352 .y = static_cast<GLint>(offset.y),
353 .z = static_cast<GLint>(offset.z),
354 };
355 default:
356 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
357 return CopyOrigin{.level = 0, .x = 0, .y = 0, .z = 0};
358 }
359}
249 360
250 return texture; 361[[nodiscard]] CopyRegion MakeCopyRegion(VideoCommon::Extent3D extent,
362 VideoCommon::SubresourceLayers dst_subresource,
363 GLenum target) {
364 switch (target) {
365 case GL_TEXTURE_2D_ARRAY:
366 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
367 return CopyRegion{
368 .width = static_cast<GLsizei>(extent.width),
369 .height = static_cast<GLsizei>(extent.height),
370 .depth = static_cast<GLsizei>(dst_subresource.num_layers),
371 };
372 case GL_TEXTURE_3D:
373 return CopyRegion{
374 .width = static_cast<GLsizei>(extent.width),
375 .height = static_cast<GLsizei>(extent.height),
376 .depth = static_cast<GLsizei>(extent.depth),
377 };
378 default:
379 UNIMPLEMENTED_MSG("Unimplemented copy target={}", target);
380 return CopyRegion{.width = 0, .height = 0, .depth = 0};
381 }
251} 382}
252 383
253constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, 384void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
254 SwizzleSource w_source) { 385 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
255 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 386 const GLuint texture = image_view->DefaultHandle();
256 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 387 glNamedFramebufferTexture(fbo, attachment, texture, 0);
388 return;
389 }
390 const GLuint texture = image_view->Handle(ImageViewType::e3D);
391 if (image_view->range.extent.layers > 1) {
392 // TODO: OpenGL doesn't support rendering to a fixed number of slices
393 glNamedFramebufferTexture(fbo, attachment, texture, 0);
394 } else {
395 const u32 slice = image_view->range.base.layer;
396 glNamedFramebufferTextureLayer(fbo, attachment, texture, 0, slice);
397 }
257} 398}
258 399
259} // Anonymous namespace 400} // Anonymous namespace
260 401
261CachedSurface::CachedSurface(const GPUVAddr gpu_addr_, const SurfaceParams& params_, 402ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_)
262 bool is_astc_supported_) 403 : span(map, size), sync{sync_}, handle{handle_} {}
263 : SurfaceBase<View>{gpu_addr_, params_, is_astc_supported_} {
264 if (is_converted) {
265 internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
266 format = GL_RGBA;
267 type = GL_UNSIGNED_BYTE;
268 } else {
269 const auto& tuple{GetFormatTuple(params.pixel_format)};
270 internal_format = tuple.internal_format;
271 format = tuple.format;
272 type = tuple.type;
273 is_compressed = params.IsCompressed();
274 }
275 target = GetTextureTarget(params.target);
276 texture = CreateTexture(params, target, internal_format, texture_buffer);
277 DecorateSurfaceName();
278 404
279 u32 num_layers = 1; 405ImageBufferMap::~ImageBufferMap() {
280 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 406 if (sync) {
281 num_layers = params.depth; 407 sync->Create();
282 } 408 }
283
284 main_view =
285 CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
286} 409}
287 410
288CachedSurface::~CachedSurface() = default; 411TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
412 StateTracker& state_tracker_)
413 : device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager) {
414 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
415 for (size_t i = 0; i < TARGETS.size(); ++i) {
416 const GLenum target = TARGETS[i];
417 for (const FormatTuple& tuple : FORMAT_TABLE) {
418 const GLenum format = tuple.internal_format;
419 GLint compat_class;
420 GLint compat_type;
421 GLint is_compressed;
422 glGetInternalformativ(target, format, GL_IMAGE_COMPATIBILITY_CLASS, 1, &compat_class);
423 glGetInternalformativ(target, format, GL_IMAGE_FORMAT_COMPATIBILITY_TYPE, 1,
424 &compat_type);
425 glGetInternalformativ(target, format, GL_TEXTURE_COMPRESSED, 1, &is_compressed);
426 const FormatProperties properties{
427 .compatibility_class = static_cast<GLenum>(compat_class),
428 .compatibility_by_size = compat_type == GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE,
429 .is_compressed = is_compressed == GL_TRUE,
430 };
431 format_properties[i].emplace(format, properties);
432 }
433 }
434 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
435 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
436 null_image_3d.Create(GL_TEXTURE_3D);
437 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
438 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
439 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
440 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
441 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
442
443 std::array<GLuint, 4> new_handles;
444 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
445 null_image_view_1d.handle = new_handles[0];
446 null_image_view_2d.handle = new_handles[1];
447 null_image_view_2d_array.handle = new_handles[2];
448 null_image_view_cube.handle = new_handles[3];
449 glTextureView(null_image_view_1d.handle, GL_TEXTURE_1D, null_image_1d_array.handle, GL_R8, 0, 1,
450 0, 1);
451 glTextureView(null_image_view_2d.handle, GL_TEXTURE_2D, null_image_cube_array.handle, GL_R8, 0,
452 1, 0, 1);
453 glTextureView(null_image_view_2d_array.handle, GL_TEXTURE_2D_ARRAY,
454 null_image_cube_array.handle, GL_R8, 0, 1, 0, 1);
455 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
456 GL_R8, 0, 1, 0, 6);
457 const std::array texture_handles{
458 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
459 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
460 null_image_view_2d_array.handle, null_image_view_cube.handle,
461 };
462 for (const GLuint handle : texture_handles) {
463 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
464 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
465 }
466 const auto set_view = [this](ImageViewType type, GLuint handle) {
467 if (device.HasDebuggingToolAttached()) {
468 const std::string name = fmt::format("NullImage {}", type);
469 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
470 }
471 null_image_views[static_cast<size_t>(type)] = handle;
472 };
473 set_view(ImageViewType::e1D, null_image_view_1d.handle);
474 set_view(ImageViewType::e2D, null_image_view_2d.handle);
475 set_view(ImageViewType::Cube, null_image_view_cube.handle);
476 set_view(ImageViewType::e3D, null_image_3d.handle);
477 set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
478 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
479 set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
480 set_view(ImageViewType::Rect, null_image_rect.handle);
481}
289 482
290void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 483TextureCacheRuntime::~TextureCacheRuntime() = default;
291 MICROPROFILE_SCOPE(OpenGL_Texture_Download);
292 484
293 if (params.IsBuffer()) { 485void TextureCacheRuntime::Finish() {
294 glGetNamedBufferSubData(texture_buffer.handle, 0, 486 glFinish();
295 static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), 487}
296 staging_buffer.data());
297 return;
298 }
299 488
300 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); 489ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
490 return upload_buffers.RequestMap(size, true);
491}
301 492
302 for (u32 level = 0; level < params.emulated_levels; ++level) { 493ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) {
303 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 494 return download_buffers.RequestMap(size, false);
304 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 495}
305 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
306 496
307 u8* const mip_data = staging_buffer.data() + mip_offset; 497void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image,
308 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); 498 std::span<const ImageCopy> copies) {
309 if (is_compressed) { 499 const GLuint dst_name = dst_image.Handle();
310 glGetCompressedTextureImage(texture.handle, level, size, mip_data); 500 const GLuint src_name = src_image.Handle();
311 } else { 501 const GLenum dst_target = ImageTarget(dst_image.info);
312 glGetTextureImage(texture.handle, level, format, type, size, mip_data); 502 const GLenum src_target = ImageTarget(src_image.info);
313 } 503 for (const ImageCopy& copy : copies) {
504 const auto src_origin = MakeCopyOrigin(copy.src_offset, copy.src_subresource, src_target);
505 const auto dst_origin = MakeCopyOrigin(copy.dst_offset, copy.dst_subresource, dst_target);
506 const auto region = MakeCopyRegion(copy.extent, copy.dst_subresource, dst_target);
507 glCopyImageSubData(src_name, src_target, src_origin.level, src_origin.x, src_origin.y,
508 src_origin.z, dst_name, dst_target, dst_origin.level, dst_origin.x,
509 dst_origin.y, dst_origin.z, region.width, region.height, region.depth);
314 } 510 }
315} 511}
316 512
317void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 513bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
318 MICROPROFILE_SCOPE(OpenGL_Texture_Upload); 514 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
319 SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); 515 return false;
320 for (u32 level = 0; level < params.emulated_levels; ++level) {
321 UploadTextureMipmap(level, staging_buffer);
322 } 516 }
517 return true;
323} 518}
324 519
325void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { 520void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
326 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); 521 std::span<const ImageCopy> copies) {
327 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 522 if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
328 523 ASSERT(src.info.type == ImageType::e3D);
329 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); 524 util_shaders.CopyBC4(dst, src, copies);
330 const u8* buffer{staging_buffer.data() + mip_offset};
331 if (is_compressed) {
332 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
333 switch (params.target) {
334 case SurfaceTarget::Texture2D:
335 glCompressedTextureSubImage2D(texture.handle, level, 0, 0,
336 static_cast<GLsizei>(params.GetMipWidth(level)),
337 static_cast<GLsizei>(params.GetMipHeight(level)),
338 internal_format, image_size, buffer);
339 break;
340 case SurfaceTarget::Texture3D:
341 case SurfaceTarget::Texture2DArray:
342 case SurfaceTarget::TextureCubeArray:
343 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0,
344 static_cast<GLsizei>(params.GetMipWidth(level)),
345 static_cast<GLsizei>(params.GetMipHeight(level)),
346 static_cast<GLsizei>(params.GetMipDepth(level)),
347 internal_format, image_size, buffer);
348 break;
349 case SurfaceTarget::TextureCubemap: {
350 const std::size_t host_layer_size{params.GetHostLayerSize(level)};
351 for (std::size_t face = 0; face < params.depth; ++face) {
352 glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
353 static_cast<GLsizei>(params.GetMipWidth(level)),
354 static_cast<GLsizei>(params.GetMipHeight(level)), 1,
355 internal_format,
356 static_cast<GLsizei>(host_layer_size), buffer);
357 buffer += host_layer_size;
358 }
359 break;
360 }
361 default:
362 UNREACHABLE();
363 }
364 } else { 525 } else {
365 switch (params.target) { 526 UNREACHABLE();
366 case SurfaceTarget::Texture1D:
367 glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type,
368 buffer);
369 break;
370 case SurfaceTarget::TextureBuffer:
371 ASSERT(level == 0);
372 glNamedBufferSubData(texture_buffer.handle, 0,
373 params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer);
374 break;
375 case SurfaceTarget::Texture1DArray:
376 case SurfaceTarget::Texture2D:
377 glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level),
378 params.GetMipHeight(level), format, type, buffer);
379 break;
380 case SurfaceTarget::Texture3D:
381 case SurfaceTarget::Texture2DArray:
382 case SurfaceTarget::TextureCubeArray:
383 glTextureSubImage3D(
384 texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)),
385 static_cast<GLsizei>(params.GetMipHeight(level)),
386 static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer);
387 break;
388 case SurfaceTarget::TextureCubemap:
389 for (std::size_t face = 0; face < params.depth; ++face) {
390 glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face),
391 params.GetMipWidth(level), params.GetMipHeight(level), 1,
392 format, type, buffer);
393 buffer += params.GetHostLayerSize(level);
394 }
395 break;
396 default:
397 UNREACHABLE();
398 }
399 } 527 }
400} 528}
401 529
402void CachedSurface::DecorateSurfaceName() { 530void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
403 LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); 531 const std::array<Offset2D, 2>& dst_region,
404} 532 const std::array<Offset2D, 2>& src_region,
533 Tegra::Engines::Fermi2D::Filter filter,
534 Tegra::Engines::Fermi2D::Operation operation) {
535 state_tracker.NotifyScissor0();
536 state_tracker.NotifyRasterizeEnable();
537 state_tracker.NotifyFramebufferSRGB();
405 538
406void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { 539 ASSERT(dst->BufferBits() == src->BufferBits());
407 LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); 540
541 glEnable(GL_FRAMEBUFFER_SRGB);
542 glDisable(GL_RASTERIZER_DISCARD);
543 glDisablei(GL_SCISSOR_TEST, 0);
544
545 const GLbitfield buffer_bits = dst->BufferBits();
546 const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
547 const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
548 glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y,
549 src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y,
550 dst_region[1].x, dst_region[1].y, buffer_bits,
551 is_linear ? GL_LINEAR : GL_NEAREST);
408} 552}
409 553
410View CachedSurface::CreateView(const ViewParams& view_key) { 554void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map,
411 return CreateViewInner(view_key, false); 555 size_t buffer_offset,
556 std::span<const SwizzleParameters> swizzles) {
557 switch (image.info.type) {
558 case ImageType::e2D:
559 return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles);
560 case ImageType::e3D:
561 return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles);
562 case ImageType::Linear:
563 return util_shaders.PitchUpload(image, map, buffer_offset, swizzles);
564 default:
565 UNREACHABLE();
566 break;
567 }
412} 568}
413 569
414View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { 570void TextureCacheRuntime::InsertUploadMemoryBarrier() {
415 auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); 571 glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
416 views[view_key] = view;
417 if (!is_proxy)
418 view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++));
419 return view;
420} 572}
421 573
422CachedSurfaceView::CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, 574FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
423 bool is_proxy_) 575 switch (type) {
424 : ViewBase{params_}, surface{surface_}, format{surface_.internal_format}, 576 case ImageType::e1D:
425 target{GetTextureTarget(params_.target)}, is_proxy{is_proxy_} { 577 return format_properties[0].at(internal_format);
426 if (!is_proxy_) { 578 case ImageType::e2D:
427 main_view = CreateTextureView(); 579 case ImageType::Linear:
580 return format_properties[1].at(internal_format);
581 case ImageType::e3D:
582 return format_properties[2].at(internal_format);
583 default:
584 UNREACHABLE();
585 return FormatProperties{};
428 } 586 }
429} 587}
430 588
431CachedSurfaceView::~CachedSurfaceView() = default; 589TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
590 : storage_flags{storage_flags_}, map_flags{map_flags_} {}
432 591
433void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { 592TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
434 ASSERT(params.num_levels == 1);
435 593
436 if (params.target == SurfaceTarget::Texture3D) { 594ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
437 if (params.num_layers > 1) { 595 bool insert_fence) {
438 ASSERT(params.base_layer == 0); 596 const size_t index = RequestBuffer(requested_size);
439 glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); 597 OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
440 } else { 598 return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync);
441 glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, 599}
442 params.base_level, params.base_layer); 600
443 } 601size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
444 return; 602 if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
603 return *index;
445 } 604 }
446 605
447 if (params.num_layers > 1) { 606 OGLBuffer& buffer = buffers.emplace_back();
448 UNIMPLEMENTED_IF(params.base_layer != 0); 607 buffer.Create();
449 glFramebufferTexture(fb_target, attachment, GetTexture(), 0); 608 glNamedBufferStorage(buffer.handle, requested_size, nullptr,
450 return; 609 storage_flags | GL_MAP_PERSISTENT_BIT);
610 maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
611 map_flags | GL_MAP_PERSISTENT_BIT)));
612
613 syncs.emplace_back();
614 sizes.push_back(requested_size);
615
616 ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
617 maps.size() == sizes.size());
618
619 return buffers.size() - 1;
620}
621
622std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
623 size_t smallest_buffer = std::numeric_limits<size_t>::max();
624 std::optional<size_t> found;
625 const size_t num_buffers = sizes.size();
626 for (size_t index = 0; index < num_buffers; ++index) {
627 const size_t buffer_size = sizes[index];
628 if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
629 continue;
630 }
631 if (syncs[index].handle != 0) {
632 GLint status;
633 glGetSynciv(syncs[index].handle, GL_SYNC_STATUS, 1, nullptr, &status);
634 if (status != GL_SIGNALED) {
635 continue;
636 }
637 syncs[index].Release();
638 }
639 smallest_buffer = buffer_size;
640 found = index;
451 } 641 }
642 return found;
643}
452 644
453 const GLenum view_target = surface.GetTarget(); 645Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
454 const GLuint texture = surface.GetTexture(); 646 VAddr cpu_addr_)
455 switch (surface.GetSurfaceParams().target) { 647 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_) {
456 case SurfaceTarget::Texture1D: 648 if (CanBeAccelerated(runtime, info)) {
457 glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); 649 flags |= ImageFlagBits::AcceleratedUpload;
650 }
651 if (IsConverted(runtime.device, info.format, info.type)) {
652 flags |= ImageFlagBits::Converted;
653 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
654 gl_store_format = GL_RGBA8;
655 gl_format = GL_RGBA;
656 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
657 } else {
658 const auto& tuple = GetFormatTuple(info.format);
659 gl_internal_format = tuple.internal_format;
660 gl_store_format = tuple.store_format;
661 gl_format = tuple.format;
662 gl_type = tuple.type;
663 }
664 const GLenum target = ImageTarget(info);
665 const GLsizei width = info.size.width;
666 const GLsizei height = info.size.height;
667 const GLsizei depth = info.size.depth;
668 const int max_host_mip_levels = std::bit_width(info.size.width);
669 const GLsizei num_levels = std::min(info.resources.levels, max_host_mip_levels);
670 const GLsizei num_layers = info.resources.layers;
671 const GLsizei num_samples = info.num_samples;
672
673 GLuint handle = 0;
674 if (target != GL_TEXTURE_BUFFER) {
675 texture.Create(target);
676 handle = texture.handle;
677 }
678 switch (target) {
679 case GL_TEXTURE_1D_ARRAY:
680 glTextureStorage2D(handle, num_levels, gl_store_format, width, num_layers);
458 break; 681 break;
459 case SurfaceTarget::Texture2D: 682 case GL_TEXTURE_2D_ARRAY:
460 glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); 683 glTextureStorage3D(handle, num_levels, gl_store_format, width, height, num_layers);
461 break; 684 break;
462 case SurfaceTarget::Texture1DArray: 685 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: {
463 case SurfaceTarget::Texture2DArray: 686 // TODO: Where should 'fixedsamplelocations' come from?
464 case SurfaceTarget::TextureCubemap: 687 const auto [samples_x, samples_y] = SamplesLog2(info.num_samples);
465 case SurfaceTarget::TextureCubeArray: 688 glTextureStorage3DMultisample(handle, num_samples, gl_store_format, width >> samples_x,
466 glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, 689 height >> samples_y, num_layers, GL_FALSE);
467 params.base_layer); 690 break;
691 }
692 case GL_TEXTURE_RECTANGLE:
693 glTextureStorage2D(handle, num_levels, gl_store_format, width, height);
694 break;
695 case GL_TEXTURE_3D:
696 glTextureStorage3D(handle, num_levels, gl_store_format, width, height, depth);
697 break;
698 case GL_TEXTURE_BUFFER:
699 buffer.Create();
700 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
468 break; 701 break;
469 default: 702 default:
470 UNIMPLEMENTED(); 703 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
704 break;
705 }
706 if (runtime.device.HasDebuggingToolAttached()) {
707 const std::string name = VideoCommon::Name(*this);
708 glObjectLabel(target == GL_TEXTURE_BUFFER ? GL_BUFFER : GL_TEXTURE, handle,
709 static_cast<GLsizei>(name.size()), name.data());
471 } 710 }
472} 711}
473 712
474GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, 713void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
475 SwizzleSource z_source, SwizzleSource w_source) { 714 std::span<const VideoCommon::BufferImageCopy> copies) {
476 if (GetSurfaceParams().IsBuffer()) { 715 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle());
477 return GetTexture(); 716 glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes);
478 }
479 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
480 if (current_swizzle == new_swizzle) {
481 return current_view;
482 }
483 current_swizzle = new_swizzle;
484 717
485 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 718 glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
486 OGLTextureView& view = entry->second;
487 if (!is_cache_miss) {
488 current_view = view.handle;
489 return view.handle;
490 }
491 view = CreateTextureView();
492 current_view = view.handle;
493 719
494 std::array swizzle{x_source, y_source, z_source, w_source}; 720 u32 current_row_length = std::numeric_limits<u32>::max();
721 u32 current_image_height = std::numeric_limits<u32>::max();
495 722
496 switch (const PixelFormat pixel_format = GetSurfaceParams().pixel_format) { 723 for (const VideoCommon::BufferImageCopy& copy : copies) {
497 case PixelFormat::D24_UNORM_S8_UINT: 724 if (current_row_length != copy.buffer_row_length) {
498 case PixelFormat::D32_FLOAT_S8_UINT: 725 current_row_length = copy.buffer_row_length;
499 case PixelFormat::S8_UINT_D24_UNORM: 726 glPixelStorei(GL_UNPACK_ROW_LENGTH, current_row_length);
500 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); 727 }
501 glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, 728 if (current_image_height != copy.buffer_image_height) {
502 GetComponent(pixel_format, x_source == SwizzleSource::R)); 729 current_image_height = copy.buffer_image_height;
503 730 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height);
504 // Make sure we sample the first component 731 }
505 std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { 732 CopyBufferToImage(copy, buffer_offset);
506 return value == SwizzleSource::G ? SwizzleSource::R : value;
507 });
508 [[fallthrough]];
509 default: {
510 const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
511 GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
512 glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
513 break;
514 }
515 } 733 }
516 return view.handle;
517} 734}
518 735
519OGLTextureView CachedSurfaceView::CreateTextureView() const { 736void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
520 OGLTextureView texture_view; 737 std::span<const VideoCommon::BufferCopy> copies) {
521 texture_view.Create(); 738 for (const VideoCommon::BufferCopy& copy : copies) {
522 739 glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset,
523 if (target == GL_TEXTURE_3D) { 740 copy.dst_offset, copy.size);
524 glTextureView(texture_view.handle, target, surface.texture.handle, format,
525 params.base_level, params.num_levels, 0, 1);
526 } else {
527 glTextureView(texture_view.handle, target, surface.texture.handle, format,
528 params.base_level, params.num_levels, params.base_layer, params.num_layers);
529 } 741 }
530 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
531
532 return texture_view;
533} 742}
534 743
535TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, 744void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
536 Tegra::Engines::Maxwell3D& maxwell3d_, 745 std::span<const VideoCommon::BufferImageCopy> copies) {
537 Tegra::MemoryManager& gpu_memory_, const Device& device_, 746 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
538 StateTracker& state_tracker_)
539 : TextureCacheBase{rasterizer_, maxwell3d_, gpu_memory_, device_.HasASTC()},
540 state_tracker{state_tracker_} {
541 src_framebuffer.Create();
542 dst_framebuffer.Create();
543}
544 747
545TextureCacheOpenGL::~TextureCacheOpenGL() = default; 748 glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle());
749 glPixelStorei(GL_PACK_ALIGNMENT, 1);
546 750
547Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 751 u32 current_row_length = std::numeric_limits<u32>::max();
548 return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); 752 u32 current_image_height = std::numeric_limits<u32>::max();
549}
550 753
551void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, 754 for (const VideoCommon::BufferImageCopy& copy : copies) {
552 const VideoCommon::CopyParams& copy_params) { 755 if (current_row_length != copy.buffer_row_length) {
553 const auto& src_params = src_surface->GetSurfaceParams(); 756 current_row_length = copy.buffer_row_length;
554 const auto& dst_params = dst_surface->GetSurfaceParams(); 757 glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length);
555 if (src_params.type != dst_params.type) { 758 }
556 // A fallback is needed 759 if (current_image_height != copy.buffer_image_height) {
557 return; 760 current_image_height = copy.buffer_image_height;
761 glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height);
762 }
763 CopyImageToBuffer(copy, buffer_offset);
558 } 764 }
559 const auto src_handle = src_surface->GetTexture();
560 const auto src_target = src_surface->GetTarget();
561 const auto dst_handle = dst_surface->GetTexture();
562 const auto dst_target = dst_surface->GetTarget();
563 glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x,
564 copy_params.source_y, copy_params.source_z, dst_handle, dst_target,
565 copy_params.dest_level, copy_params.dest_x, copy_params.dest_y,
566 copy_params.dest_z, copy_params.width, copy_params.height,
567 copy_params.depth);
568} 765}
569 766
570void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, 767void Image::CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
571 const Tegra::Engines::Fermi2D::Config& copy_config) { 768 // Compressed formats don't have a pixel format or type
572 const auto& src_params{src_view->GetSurfaceParams()}; 769 const bool is_compressed = gl_format == GL_NONE;
573 const auto& dst_params{dst_view->GetSurfaceParams()}; 770 const void* const offset = reinterpret_cast<const void*>(copy.buffer_offset + buffer_offset);
574 UNIMPLEMENTED_IF(src_params.depth != 1);
575 UNIMPLEMENTED_IF(dst_params.depth != 1);
576
577 state_tracker.NotifyScissor0();
578 state_tracker.NotifyFramebuffer();
579 state_tracker.NotifyRasterizeEnable();
580 state_tracker.NotifyFramebufferSRGB();
581 771
582 if (dst_params.srgb_conversion) { 772 switch (info.type) {
583 glEnable(GL_FRAMEBUFFER_SRGB); 773 case ImageType::e1D:
584 } else { 774 if (is_compressed) {
585 glDisable(GL_FRAMEBUFFER_SRGB); 775 glCompressedTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
776 copy.image_offset.x, copy.image_subresource.base_layer,
777 copy.image_extent.width,
778 copy.image_subresource.num_layers, gl_internal_format,
779 static_cast<GLsizei>(copy.buffer_size), offset);
780 } else {
781 glTextureSubImage2D(texture.handle, copy.image_subresource.base_level,
782 copy.image_offset.x, copy.image_subresource.base_layer,
783 copy.image_extent.width, copy.image_subresource.num_layers,
784 gl_format, gl_type, offset);
785 }
786 break;
787 case ImageType::e2D:
788 case ImageType::Linear:
789 if (is_compressed) {
790 glCompressedTextureSubImage3D(
791 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
792 copy.image_offset.y, copy.image_subresource.base_layer, copy.image_extent.width,
793 copy.image_extent.height, copy.image_subresource.num_layers, gl_internal_format,
794 static_cast<GLsizei>(copy.buffer_size), offset);
795 } else {
796 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
797 copy.image_offset.x, copy.image_offset.y,
798 copy.image_subresource.base_layer, copy.image_extent.width,
799 copy.image_extent.height, copy.image_subresource.num_layers,
800 gl_format, gl_type, offset);
801 }
802 break;
803 case ImageType::e3D:
804 if (is_compressed) {
805 glCompressedTextureSubImage3D(
806 texture.handle, copy.image_subresource.base_level, copy.image_offset.x,
807 copy.image_offset.y, copy.image_offset.z, copy.image_extent.width,
808 copy.image_extent.height, copy.image_extent.depth, gl_internal_format,
809 static_cast<GLsizei>(copy.buffer_size), offset);
810 } else {
811 glTextureSubImage3D(texture.handle, copy.image_subresource.base_level,
812 copy.image_offset.x, copy.image_offset.y, copy.image_offset.z,
813 copy.image_extent.width, copy.image_extent.height,
814 copy.image_extent.depth, gl_format, gl_type, offset);
815 }
816 break;
817 default:
818 UNREACHABLE();
586 } 819 }
587 glDisable(GL_RASTERIZER_DISCARD); 820}
588 glDisablei(GL_SCISSOR_TEST, 0);
589
590 glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
591 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
592
593 GLenum buffers = 0;
594 if (src_params.type == SurfaceType::ColorTexture) {
595 src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
596 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
597 0);
598
599 dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
600 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
601 0);
602
603 buffers = GL_COLOR_BUFFER_BIT;
604 } else if (src_params.type == SurfaceType::Depth) {
605 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
606 src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER);
607 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
608 821
609 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 822void Image::CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset) {
610 dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 823 const GLint x_offset = copy.image_offset.x;
611 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); 824 const GLsizei width = copy.image_extent.width;
612 825
613 buffers = GL_DEPTH_BUFFER_BIT; 826 const GLint level = copy.image_subresource.base_level;
614 } else if (src_params.type == SurfaceType::DepthStencil) { 827 const GLsizei buffer_size = static_cast<GLsizei>(copy.buffer_size);
615 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 828 void* const offset = reinterpret_cast<void*>(copy.buffer_offset + buffer_offset);
616 src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER);
617 829
618 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); 830 GLint y_offset = 0;
619 dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); 831 GLint z_offset = 0;
832 GLsizei height = 1;
833 GLsizei depth = 1;
620 834
621 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; 835 switch (info.type) {
836 case ImageType::e1D:
837 y_offset = copy.image_subresource.base_layer;
838 height = copy.image_subresource.num_layers;
839 break;
840 case ImageType::e2D:
841 case ImageType::Linear:
842 y_offset = copy.image_offset.y;
843 z_offset = copy.image_subresource.base_layer;
844 height = copy.image_extent.height;
845 depth = copy.image_subresource.num_layers;
846 break;
847 case ImageType::e3D:
848 y_offset = copy.image_offset.y;
849 z_offset = copy.image_offset.z;
850 height = copy.image_extent.height;
851 depth = copy.image_extent.depth;
852 break;
853 default:
854 UNREACHABLE();
855 }
856 // Compressed formats don't have a pixel format or type
857 const bool is_compressed = gl_format == GL_NONE;
858 if (is_compressed) {
859 glGetCompressedTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width,
860 height, depth, buffer_size, offset);
861 } else {
862 glGetTextureSubImage(texture.handle, level, x_offset, y_offset, z_offset, width, height,
863 depth, gl_format, gl_type, buffer_size, offset);
622 } 864 }
623
624 const Common::Rectangle<u32>& src_rect = copy_config.src_rect;
625 const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
626 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
627
628 glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
629 static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
630 static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
631 static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
632 buffers,
633 is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
634} 865}
635 866
636void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 867ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
637 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); 868 ImageId image_id_, Image& image)
638 const auto& src_params = src_surface->GetSurfaceParams(); 869 : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} {
639 const auto& dst_params = dst_surface->GetSurfaceParams(); 870 const Device& device = runtime.device;
640 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 871 if (True(image.flags & ImageFlagBits::Converted)) {
872 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
873 } else {
874 internal_format = GetFormatTuple(format).internal_format;
875 }
876 VideoCommon::SubresourceRange flatten_range = info.range;
877 std::array<GLuint, 2> handles;
878 stored_views.reserve(2);
641 879
642 const auto source_format = GetFormatTuple(src_params.pixel_format); 880 switch (info.type) {
643 const auto dest_format = GetFormatTuple(dst_params.pixel_format); 881 case ImageViewType::e1DArray:
882 flatten_range.extent.layers = 1;
883 [[fallthrough]];
884 case ImageViewType::e1D:
885 glGenTextures(2, handles.data());
886 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
887 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
888 break;
889 case ImageViewType::e2DArray:
890 flatten_range.extent.layers = 1;
891 [[fallthrough]];
892 case ImageViewType::e2D:
893 if (True(flags & VideoCommon::ImageViewFlagBits::Slice)) {
894 // 2D and 2D array views on a 3D textures are used exclusively for render targets
895 ASSERT(info.range.extent.levels == 1);
896 const VideoCommon::SubresourceRange slice_range{
897 .base = {.level = info.range.base.level, .layer = 0},
898 .extent = {.levels = 1, .layers = 1},
899 };
900 glGenTextures(1, handles.data());
901 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
902 break;
903 }
904 glGenTextures(2, handles.data());
905 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
906 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
907 break;
908 case ImageViewType::e3D:
909 glGenTextures(1, handles.data());
910 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
911 break;
912 case ImageViewType::CubeArray:
913 flatten_range.extent.layers = 6;
914 [[fallthrough]];
915 case ImageViewType::Cube:
916 glGenTextures(2, handles.data());
917 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
918 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
919 break;
920 case ImageViewType::Rect:
921 glGenTextures(1, handles.data());
922 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
923 break;
924 case ImageViewType::Buffer:
925 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
926 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
927 break;
928 }
929 default_handle = Handle(info.type);
930}
644 931
645 const std::size_t source_size = src_surface->GetHostSizeInBytes(); 932ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
646 const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); 933 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
647 934
648 const std::size_t buffer_size = std::max(source_size, dest_size); 935void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
936 GLuint handle, const VideoCommon::ImageViewInfo& info,
937 VideoCommon::SubresourceRange view_range) {
938 if (info.type == ImageViewType::Buffer) {
939 // TODO: Take offset from buffer cache
940 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
941 image.guest_size_bytes);
942 } else {
943 const GLuint parent = image.texture.handle;
944 const GLenum target = ImageTarget(view_type, image.info.num_samples);
945 glTextureView(handle, target, parent, internal_format, view_range.base.level,
946 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
947 if (!info.IsRenderTarget()) {
948 ApplySwizzle(handle, format, info.Swizzle());
949 }
950 }
951 if (device.HasDebuggingToolAttached()) {
952 const std::string name = VideoCommon::Name(*this, view_type);
953 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
954 }
955 stored_views.emplace_back().handle = handle;
956 views[static_cast<size_t>(view_type)] = handle;
957}
649 958
650 GLuint copy_pbo_handle = FetchPBO(buffer_size); 959Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
960 const GLenum compare_mode = config.depth_compare_enabled ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
961 const GLenum compare_func = MaxwellToGL::DepthCompareFunc(config.depth_compare_func);
962 const GLenum mag = MaxwellToGL::TextureFilterMode(config.mag_filter, TextureMipmapFilter::None);
963 const GLenum min = MaxwellToGL::TextureFilterMode(config.min_filter, config.mipmap_filter);
964 const GLenum reduction_filter = MaxwellToGL::ReductionFilter(config.reduction_filter);
965 const GLint seamless = config.cubemap_interface_filtering ? GL_TRUE : GL_FALSE;
966
967 UNIMPLEMENTED_IF(config.cubemap_anisotropy != 1);
968 UNIMPLEMENTED_IF(config.float_coord_normalization != 0);
969
970 sampler.Create();
971 const GLuint handle = sampler.handle;
972 glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(config.wrap_u));
973 glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(config.wrap_v));
974 glSamplerParameteri(handle, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(config.wrap_p));
975 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_MODE, compare_mode);
976 glSamplerParameteri(handle, GL_TEXTURE_COMPARE_FUNC, compare_func);
977 glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag);
978 glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min);
979 glSamplerParameterf(handle, GL_TEXTURE_LOD_BIAS, config.LodBias());
980 glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, config.MinLod());
981 glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, config.MaxLod());
982 glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
983
984 if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
985 glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
986 } else {
987 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
988 }
989 if (GLAD_GL_ARB_texture_filter_minmax || GLAD_GL_EXT_texture_filter_minmax) {
990 glSamplerParameteri(handle, GL_TEXTURE_REDUCTION_MODE_ARB, reduction_filter);
991 } else if (reduction_filter != GL_WEIGHTED_AVERAGE_ARB) {
992 LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_minmax is required");
993 }
994 if (GLAD_GL_ARB_seamless_cubemap_per_texture || GLAD_GL_AMD_seamless_cubemap_per_texture) {
995 glSamplerParameteri(handle, GL_TEXTURE_CUBE_MAP_SEAMLESS, seamless);
996 } else if (seamless == GL_FALSE) {
997 // We default to false because it's more common
998 LOG_WARNING(Render_OpenGL, "GL_ARB_seamless_cubemap_per_texture is required");
999 }
1000}
651 1001
652 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 1002Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1003 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1004 // Bind to READ_FRAMEBUFFER to stop Nvidia's driver from creating an EXT_framebuffer instead of
1005 // a core framebuffer. EXT framebuffer attachments have to match in size and can be shared
1006 // across contexts. yuzu doesn't share framebuffers across contexts and we need attachments with
1007 // mismatching size, this is why core framebuffers are preferred.
1008 GLuint handle;
1009 glGenFramebuffers(1, &handle);
1010 glBindFramebuffer(GL_READ_FRAMEBUFFER, handle);
1011
1012 GLsizei num_buffers = 0;
1013 std::array<GLenum, NUM_RT> gl_draw_buffers;
1014 gl_draw_buffers.fill(GL_NONE);
1015
1016 for (size_t index = 0; index < color_buffers.size(); ++index) {
1017 const ImageView* const image_view = color_buffers[index];
1018 if (!image_view) {
1019 continue;
1020 }
1021 buffer_bits |= GL_COLOR_BUFFER_BIT;
1022 gl_draw_buffers[index] = GL_COLOR_ATTACHMENT0 + key.draw_buffers[index];
1023 num_buffers = static_cast<GLsizei>(index + 1);
653 1024
654 if (src_surface->IsCompressed()) { 1025 const GLenum attachment = static_cast<GLenum>(GL_COLOR_ATTACHMENT0 + index);
655 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), 1026 AttachTexture(handle, attachment, image_view);
656 nullptr);
657 } else {
658 glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type,
659 static_cast<GLsizei>(source_size), nullptr);
660 } 1027 }
661 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
662 1028
663 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); 1029 if (const ImageView* const image_view = depth_buffer; image_view) {
1030 if (GetFormatType(image_view->format) == SurfaceType::DepthStencil) {
1031 buffer_bits |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1032 } else {
1033 buffer_bits |= GL_DEPTH_BUFFER_BIT;
1034 }
1035 const GLenum attachment = AttachmentType(image_view->format);
1036 AttachTexture(handle, attachment, image_view);
1037 }
664 1038
665 const GLsizei width = static_cast<GLsizei>(dst_params.width); 1039 if (num_buffers > 1) {
666 const GLsizei height = static_cast<GLsizei>(dst_params.height); 1040 glNamedFramebufferDrawBuffers(handle, num_buffers, gl_draw_buffers.data());
667 const GLsizei depth = static_cast<GLsizei>(dst_params.depth); 1041 } else if (num_buffers > 0) {
668 if (dst_surface->IsCompressed()) { 1042 glNamedFramebufferDrawBuffer(handle, gl_draw_buffers[0]);
669 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
670 UNREACHABLE();
671 } else { 1043 } else {
672 switch (dst_params.target) { 1044 glNamedFramebufferDrawBuffer(handle, GL_NONE);
673 case SurfaceTarget::Texture1D:
674 glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format,
675 dest_format.type, nullptr);
676 break;
677 case SurfaceTarget::Texture2D:
678 glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height,
679 dest_format.format, dest_format.type, nullptr);
680 break;
681 case SurfaceTarget::Texture3D:
682 case SurfaceTarget::Texture2DArray:
683 case SurfaceTarget::TextureCubeArray:
684 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
685 dest_format.format, dest_format.type, nullptr);
686 break;
687 case SurfaceTarget::TextureCubemap:
688 glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth,
689 dest_format.format, dest_format.type, nullptr);
690 break;
691 default:
692 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", dst_params.target);
693 UNREACHABLE();
694 }
695 } 1045 }
696 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
697 1046
698 glTextureBarrier(); 1047 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_WIDTH, key.size.width);
699} 1048 glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_HEIGHT, key.size.height);
1049 // TODO
1050 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_LAYERS, ...);
1051 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_SAMPLES, ...);
1052 // glNamedFramebufferParameteri(handle, GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS, ...);
700 1053
701GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { 1054 if (runtime.device.HasDebuggingToolAttached()) {
702 ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); 1055 const std::string name = VideoCommon::Name(key);
703 const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); 1056 glObjectLabel(GL_FRAMEBUFFER, handle, static_cast<GLsizei>(name.size()), name.data());
704 OGLBuffer& cp = copy_pbo_cache[l2];
705 if (cp.handle == 0) {
706 const std::size_t ceil_size = 1ULL << l2;
707 cp.Create();
708 cp.MakeStreamCopy(ceil_size);
709 } 1057 }
710 return cp.handle; 1058 framebuffer.handle = handle;
711} 1059}
712 1060
713} // namespace OpenGL 1061} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 72b284fab..04193e31e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -4,157 +4,247 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <functional>
9#include <memory> 7#include <memory>
10#include <unordered_map> 8#include <span>
11#include <utility>
12#include <vector>
13 9
14#include <glad/glad.h> 10#include <glad/glad.h>
15 11
16#include "common/common_types.h"
17#include "video_core/engines/shader_bytecode.h"
18#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h"
20#include "video_core/texture_cache/texture_cache.h" 14#include "video_core/texture_cache/texture_cache.h"
21 15
22namespace OpenGL { 16namespace OpenGL {
23 17
24using VideoCommon::SurfaceParams; 18class Device;
25using VideoCommon::ViewParams; 19class ProgramManager;
26
27class CachedSurfaceView;
28class CachedSurface;
29class TextureCacheOpenGL;
30class StateTracker; 20class StateTracker;
31 21
32using Surface = std::shared_ptr<CachedSurface>; 22class Framebuffer;
33using View = std::shared_ptr<CachedSurfaceView>; 23class Image;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; 24class ImageView;
25class Sampler;
35 26
36class CachedSurface final : public VideoCommon::SurfaceBase<View> { 27using VideoCommon::ImageId;
37 friend CachedSurfaceView; 28using VideoCommon::ImageViewId;
29using VideoCommon::ImageViewType;
30using VideoCommon::NUM_RT;
31using VideoCommon::Offset2D;
32using VideoCommon::RenderTargets;
38 33
34class ImageBufferMap {
39public: 35public:
40 explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_, 36 explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
41 bool is_astc_supported_); 37 ~ImageBufferMap();
42 ~CachedSurface();
43
44 void UploadTexture(const std::vector<u8>& staging_buffer) override;
45 void DownloadTexture(std::vector<u8>& staging_buffer) override;
46 38
47 GLenum GetTarget() const { 39 GLuint Handle() const noexcept {
48 return target; 40 return handle;
49 } 41 }
50 42
51 GLuint GetTexture() const { 43 std::span<u8> Span() const noexcept {
52 return texture.handle; 44 return span;
53 } 45 }
54 46
55 bool IsCompressed() const { 47private:
56 return is_compressed; 48 std::span<u8> span;
49 OGLSync* sync;
50 GLuint handle;
51};
52
53struct FormatProperties {
54 GLenum compatibility_class;
55 bool compatibility_by_size;
56 bool is_compressed;
57};
58
59class TextureCacheRuntime {
60 friend Framebuffer;
61 friend Image;
62 friend ImageView;
63 friend Sampler;
64
65public:
66 explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
67 StateTracker& state_tracker);
68 ~TextureCacheRuntime();
69
70 void Finish();
71
72 ImageBufferMap MapUploadBuffer(size_t size);
73
74 ImageBufferMap MapDownloadBuffer(size_t size);
75
76 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
77
78 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
79 UNIMPLEMENTED();
57 } 80 }
58 81
59protected: 82 bool CanImageBeCopied(const Image& dst, const Image& src);
60 void DecorateSurfaceName() override; 83
84 void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
85
86 void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
87 const std::array<Offset2D, 2>& dst_region,
88 const std::array<Offset2D, 2>& src_region,
89 Tegra::Engines::Fermi2D::Filter filter,
90 Tegra::Engines::Fermi2D::Operation operation);
61 91
62 View CreateView(const ViewParams& view_key) override; 92 void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
63 View CreateViewInner(const ViewParams& view_key, bool is_proxy); 93 std::span<const VideoCommon::SwizzleParameters> swizzles);
94
95 void InsertUploadMemoryBarrier();
96
97 FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
64 98
65private: 99private:
66 void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); 100 struct StagingBuffers {
101 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
102 ~StagingBuffers();
67 103
68 GLenum internal_format{}; 104 ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
69 GLenum format{};
70 GLenum type{};
71 bool is_compressed{};
72 GLenum target{};
73 u32 view_count{};
74 105
75 OGLTexture texture; 106 size_t RequestBuffer(size_t requested_size);
76 OGLBuffer texture_buffer; 107
108 std::optional<size_t> FindBuffer(size_t requested_size);
109
110 std::vector<OGLSync> syncs;
111 std::vector<OGLBuffer> buffers;
112 std::vector<u8*> maps;
113 std::vector<size_t> sizes;
114 GLenum storage_flags;
115 GLenum map_flags;
116 };
117
118 const Device& device;
119 StateTracker& state_tracker;
120 UtilShaders util_shaders;
121
122 std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
123
124 StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
125 StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
126
127 OGLTexture null_image_1d_array;
128 OGLTexture null_image_cube_array;
129 OGLTexture null_image_3d;
130 OGLTexture null_image_rect;
131 OGLTextureView null_image_view_1d;
132 OGLTextureView null_image_view_2d;
133 OGLTextureView null_image_view_2d_array;
134 OGLTextureView null_image_view_cube;
135
136 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
77}; 137};
78 138
79class CachedSurfaceView final : public VideoCommon::ViewBase { 139class Image : public VideoCommon::ImageBase {
140 friend ImageView;
141
80public: 142public:
81 explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_); 143 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
82 ~CachedSurfaceView(); 144 VAddr cpu_addr);
83 145
84 /// @brief Attaches this texture view to the currently bound fb_target framebuffer 146 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
85 /// @param attachment Attachment to bind textures to 147 std::span<const VideoCommon::BufferImageCopy> copies);
86 /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
87 void Attach(GLenum attachment, GLenum fb_target) const;
88 148
89 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, 149 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
90 Tegra::Texture::SwizzleSource y_source, 150 std::span<const VideoCommon::BufferCopy> copies);
91 Tegra::Texture::SwizzleSource z_source,
92 Tegra::Texture::SwizzleSource w_source);
93 151
94 void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); 152 void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
153 std::span<const VideoCommon::BufferImageCopy> copies);
95 154
96 void MarkAsModified(u64 tick) { 155 GLuint Handle() const noexcept {
97 surface.MarkAsModified(true, tick); 156 return texture.handle;
98 } 157 }
99 158
100 GLuint GetTexture() const { 159private:
101 if (is_proxy) { 160 void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
102 return surface.GetTexture(); 161
103 } 162 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
104 return main_view.handle; 163
164 OGLTexture texture;
165 OGLTextureView store_view;
166 OGLBuffer buffer;
167 GLenum gl_internal_format = GL_NONE;
168 GLenum gl_store_format = GL_NONE;
169 GLenum gl_format = GL_NONE;
170 GLenum gl_type = GL_NONE;
171};
172
173class ImageView : public VideoCommon::ImageViewBase {
174 friend Image;
175
176public:
177 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
178 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
179
180 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
181 return views[static_cast<size_t>(query_type)];
105 } 182 }
106 183
107 GLenum GetFormat() const { 184 [[nodiscard]] GLuint DefaultHandle() const noexcept {
108 return format; 185 return default_handle;
109 } 186 }
110 187
111 const SurfaceParams& GetSurfaceParams() const { 188 [[nodiscard]] GLenum Format() const noexcept {
112 return surface.GetSurfaceParams(); 189 return internal_format;
113 } 190 }
114 191
115private: 192private:
116 OGLTextureView CreateTextureView() const; 193 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
194 const VideoCommon::ImageViewInfo& info,
195 VideoCommon::SubresourceRange view_range);
196
197 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
198 std::vector<OGLTextureView> stored_views;
199 GLuint default_handle = 0;
200 GLenum internal_format = GL_NONE;
201};
202
203class ImageAlloc : public VideoCommon::ImageAllocBase {};
117 204
118 CachedSurface& surface; 205class Sampler {
119 const GLenum format; 206public:
120 const GLenum target; 207 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
121 const bool is_proxy;
122 208
123 std::unordered_map<u32, OGLTextureView> view_cache; 209 GLuint Handle() const noexcept {
124 OGLTextureView main_view; 210 return sampler.handle;
211 }
125 212
126 // Use an invalid default so it always fails the comparison test 213private:
127 u32 current_swizzle = 0xffffffff; 214 OGLSampler sampler;
128 GLuint current_view = 0;
129}; 215};
130 216
131class TextureCacheOpenGL final : public TextureCacheBase { 217class Framebuffer {
132public: 218public:
133 explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_, 219 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
134 Tegra::Engines::Maxwell3D& maxwell3d_, 220 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
135 Tegra::MemoryManager& gpu_memory_, const Device& device_,
136 StateTracker& state_tracker);
137 ~TextureCacheOpenGL();
138
139protected:
140 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
141
142 void ImageCopy(Surface& src_surface, Surface& dst_surface,
143 const VideoCommon::CopyParams& copy_params) override;
144 221
145 void ImageBlit(View& src_view, View& dst_view, 222 [[nodiscard]] GLuint Handle() const noexcept {
146 const Tegra::Engines::Fermi2D::Config& copy_config) override; 223 return framebuffer.handle;
224 }
147 225
148 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 226 [[nodiscard]] GLbitfield BufferBits() const noexcept {
227 return buffer_bits;
228 }
149 229
150private: 230private:
151 GLuint FetchPBO(std::size_t buffer_size); 231 OGLFramebuffer framebuffer;
152 232 GLbitfield buffer_bits = GL_NONE;
153 StateTracker& state_tracker; 233};
154 234
155 OGLFramebuffer src_framebuffer; 235struct TextureCacheParams {
156 OGLFramebuffer dst_framebuffer; 236 static constexpr bool ENABLE_VALIDATION = true;
157 std::unordered_map<u32, OGLBuffer> copy_pbo_cache; 237 static constexpr bool FRAMEBUFFER_BLITS = true;
238 static constexpr bool HAS_EMULATED_COPIES = true;
239
240 using Runtime = OpenGL::TextureCacheRuntime;
241 using Image = OpenGL::Image;
242 using ImageAlloc = OpenGL::ImageAlloc;
243 using ImageView = OpenGL::ImageView;
244 using Sampler = OpenGL::Sampler;
245 using Framebuffer = OpenGL::Framebuffer;
158}; 246};
159 247
248using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
249
160} // namespace OpenGL 250} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index dd4ee3361..cbccfdeb4 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
475 return GL_FILL; 475 return GL_FILL;
476} 476}
477 477
478inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
479 switch (filter) {
480 case Tegra::Texture::SamplerReduction::WeightedAverage:
481 return GL_WEIGHTED_AVERAGE_ARB;
482 case Tegra::Texture::SamplerReduction::Min:
483 return GL_MIN;
484 case Tegra::Texture::SamplerReduction::Max:
485 return GL_MAX;
486 }
487 UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
488 return GL_WEIGHTED_AVERAGE_ARB;
489}
490
478inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { 491inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
479 // Enumeration order matches register order. We can convert it arithmetically. 492 // Enumeration order matches register order. We can convert it arithmetically.
480 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); 493 return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index cbfaaa99c..dd77a543c 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -23,10 +23,10 @@
23#include "core/telemetry_session.h" 23#include "core/telemetry_session.h"
24#include "video_core/host_shaders/opengl_present_frag.h" 24#include "video_core/host_shaders/opengl_present_frag.h"
25#include "video_core/host_shaders/opengl_present_vert.h" 25#include "video_core/host_shaders/opengl_present_vert.h"
26#include "video_core/morton.h"
27#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
28#include "video_core/renderer_opengl/gl_shader_manager.h" 27#include "video_core/renderer_opengl/gl_shader_manager.h"
29#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
29#include "video_core/textures/decoders.h"
30 30
31namespace OpenGL { 31namespace OpenGL {
32 32
@@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
140 if (!framebuffer) { 140 if (!framebuffer) {
141 return; 141 return;
142 } 142 }
143
144 PrepareRendertarget(framebuffer); 143 PrepareRendertarget(framebuffer);
145 RenderScreenshot(); 144 RenderScreenshot();
146 145
147 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); 146 state_tracker.BindFramebuffer(0);
148 DrawScreen(emu_window.GetFramebufferLayout()); 147 DrawScreen(emu_window.GetFramebufferLayout());
149 148
150 ++m_current_frame; 149 ++m_current_frame;
@@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
187 // Reset the screen info's display texture to its own permanent texture 186 // Reset the screen info's display texture to its own permanent texture
188 screen_info.display_texture = screen_info.texture.resource.handle; 187 screen_info.display_texture = screen_info.texture.resource.handle;
189 188
190 const auto pixel_format{
191 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
192 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
193 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
194 u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
195 rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
196
197 // TODO(Rodrigo): Read this from HLE 189 // TODO(Rodrigo): Read this from HLE
198 constexpr u32 block_height_log2 = 4; 190 constexpr u32 block_height_log2 = 4;
199 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 191 const auto pixel_format{
200 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 192 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
201 gl_framebuffer_data.data(), host_ptr); 193 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
202 194 const u64 size_in_bytes{Tegra::Texture::CalculateSize(
195 true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
196 const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
197 const std::span<const u8> input_data(host_ptr, size_in_bytes);
198 Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
199 framebuffer.width, framebuffer.height, 1, block_height_log2,
200 0);
201
202 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 203 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
204 204
205 // Update existing texture 205 // Update existing texture
@@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); 238 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); 239 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
240 240
241 // Generate presentation sampler
242 present_sampler.Create();
243 glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
244
241 // Generate VBO handle for drawing 245 // Generate VBO handle for drawing
242 vertex_buffer.Create(); 246 vertex_buffer.Create();
243 247
@@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
255 // Clear screen to black 259 // Clear screen to black
256 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 260 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
257 261
262 // Enable seamless cubemaps when per texture parameters are not available
263 if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
264 glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
265 }
266
258 // Enable unified vertex attributes and query vertex buffer address when the driver supports it 267 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
259 if (device.HasVertexBufferUnifiedMemory()) { 268 if (device.HasVertexBufferUnifiedMemory()) {
260 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); 269 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
@@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
296 305
297 const auto pixel_format{ 306 const auto pixel_format{
298 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; 307 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
299 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; 308 const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
300 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); 309 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
301 310
302 GLint internal_format; 311 GLint internal_format;
@@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
315 internal_format = GL_RGBA8; 324 internal_format = GL_RGBA8;
316 texture.gl_format = GL_RGBA; 325 texture.gl_format = GL_RGBA;
317 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 326 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
318 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", 327 // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
319 static_cast<u32>(framebuffer.pixel_format)); 328 // static_cast<u32>(framebuffer.pixel_format));
320 } 329 }
321 330
322 texture.resource.Release(); 331 texture.resource.Release();
@@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
382 state_tracker.NotifyPolygonModes(); 391 state_tracker.NotifyPolygonModes();
383 state_tracker.NotifyViewport0(); 392 state_tracker.NotifyViewport0();
384 state_tracker.NotifyScissor0(); 393 state_tracker.NotifyScissor0();
385 state_tracker.NotifyColorMask0(); 394 state_tracker.NotifyColorMask(0);
386 state_tracker.NotifyBlend0(); 395 state_tracker.NotifyBlend0();
387 state_tracker.NotifyFramebuffer(); 396 state_tracker.NotifyFramebuffer();
388 state_tracker.NotifyFrontFace(); 397 state_tracker.NotifyFrontFace();
@@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
440 } 449 }
441 450
442 glBindTextureUnit(0, screen_info.display_texture); 451 glBindTextureUnit(0, screen_info.display_texture);
443 glBindSampler(0, 0); 452 glBindSampler(0, present_sampler.handle);
444 453
445 glClear(GL_COLOR_BUFFER_BIT); 454 glClear(GL_COLOR_BUFFER_BIT);
446 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 455 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
@@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
473 482
474 DrawScreen(layout); 483 DrawScreen(layout);
475 484
485 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
486 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
476 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 487 glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
477 renderer_settings.screenshot_bits); 488 renderer_settings.screenshot_bits);
478 489
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 376f88766..44e109794 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -102,6 +102,7 @@ private:
102 StateTracker state_tracker{gpu}; 102 StateTracker state_tracker{gpu};
103 103
104 // OpenGL object IDs 104 // OpenGL object IDs
105 OGLSampler present_sampler;
105 OGLBuffer vertex_buffer; 106 OGLBuffer vertex_buffer;
106 OGLProgram vertex_program; 107 OGLProgram vertex_program;
107 OGLProgram fragment_program; 108 OGLProgram fragment_program;
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
new file mode 100644
index 000000000..eb849cbf2
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -0,0 +1,224 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <bit>
6#include <span>
7#include <string_view>
8
9#include <glad/glad.h>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "common/div_ceil.h"
14#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
15#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/pitch_unswizzle_comp.h"
18#include "video_core/renderer_opengl/gl_resource_manager.h"
19#include "video_core/renderer_opengl/gl_shader_manager.h"
20#include "video_core/renderer_opengl/gl_texture_cache.h"
21#include "video_core/renderer_opengl/util_shaders.h"
22#include "video_core/surface.h"
23#include "video_core/texture_cache/accelerated_swizzle.h"
24#include "video_core/texture_cache/types.h"
25#include "video_core/texture_cache/util.h"
26#include "video_core/textures/decoders.h"
27
28namespace OpenGL {
29
30using namespace HostShaders;
31
32using VideoCommon::Extent3D;
33using VideoCommon::ImageCopy;
34using VideoCommon::ImageType;
35using VideoCommon::SwizzleParameters;
36using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
37using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
38using VideoCore::Surface::BytesPerBlock;
39
40namespace {
41
42OGLProgram MakeProgram(std::string_view source) {
43 OGLShader shader;
44 shader.Create(source, GL_COMPUTE_SHADER);
45
46 OGLProgram program;
47 program.Create(true, false, shader.handle);
48 return program;
49}
50
51} // Anonymous namespace
52
53UtilShaders::UtilShaders(ProgramManager& program_manager_)
54 : program_manager{program_manager_},
55 block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
56 block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
57 pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
58 copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
59 const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
60 swizzle_table_buffer.Create();
61 glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
62}
63
64UtilShaders::~UtilShaders() = default;
65
66void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
67 std::span<const SwizzleParameters> swizzles) {
68 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
69 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
70 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
72
73 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
74 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
75 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
76
77 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
78 for (const SwizzleParameters& swizzle : swizzles) {
79 const Extent3D num_tiles = swizzle.num_tiles;
80 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
81
82 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
83 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
84
85 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
86 glUniform3uiv(0, 1, params.origin.data());
87 glUniform3iv(1, 1, params.destination.data());
88 glUniform1ui(2, params.bytes_per_block_log2);
89 glUniform1ui(3, params.layer_stride);
90 glUniform1ui(4, params.block_size);
91 glUniform1ui(5, params.x_shift);
92 glUniform1ui(6, params.block_height);
93 glUniform1ui(7, params.block_height_mask);
94 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
95 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
96 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
97 GL_WRITE_ONLY, store_format);
98 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
99 }
100 program_manager.RestoreGuestCompute();
101}
102
103void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
104 std::span<const SwizzleParameters> swizzles) {
105 static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
106
107 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
108 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
109 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
110
111 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
112 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
113 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
114
115 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
116 for (const SwizzleParameters& swizzle : swizzles) {
117 const Extent3D num_tiles = swizzle.num_tiles;
118 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
119
120 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
121 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
122 const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
123
124 const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
125 glUniform3uiv(0, 1, params.origin.data());
126 glUniform3iv(1, 1, params.destination.data());
127 glUniform1ui(2, params.bytes_per_block_log2);
128 glUniform1ui(3, params.slice_size);
129 glUniform1ui(4, params.block_size);
130 glUniform1ui(5, params.x_shift);
131 glUniform1ui(6, params.block_height);
132 glUniform1ui(7, params.block_height_mask);
133 glUniform1ui(8, params.block_depth);
134 glUniform1ui(9, params.block_depth_mask);
135 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
136 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
137 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
138 GL_WRITE_ONLY, store_format);
139 glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
140 }
141 program_manager.RestoreGuestCompute();
142}
143
144void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
145 std::span<const SwizzleParameters> swizzles) {
146 static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
147 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
148 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
149 static constexpr GLuint LOC_ORIGIN = 0;
150 static constexpr GLuint LOC_DESTINATION = 1;
151 static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
152 static constexpr GLuint LOC_PITCH = 3;
153
154 const u32 bytes_per_block = BytesPerBlock(image.info.format);
155 const GLenum format = StoreFormat(bytes_per_block);
156 const u32 pitch = image.info.pitch;
157
158 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
159 "Non-power of two images are not implemented");
160
161 program_manager.BindHostCompute(pitch_unswizzle_program.handle);
162 glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
163 glUniform2ui(LOC_ORIGIN, 0, 0);
164 glUniform2i(LOC_DESTINATION, 0, 0);
165 glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
166 glUniform1ui(LOC_PITCH, pitch);
167 glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
168 for (const SwizzleParameters& swizzle : swizzles) {
169 const Extent3D num_tiles = swizzle.num_tiles;
170 const size_t input_offset = swizzle.buffer_offset + buffer_offset;
171
172 const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
173 const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
174
175 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
176 input_offset, image.guest_size_bytes - swizzle.buffer_offset);
177 glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
178 }
179 program_manager.RestoreGuestCompute();
180}
181
182void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
183 static constexpr GLuint BINDING_INPUT_IMAGE = 0;
184 static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
185 static constexpr GLuint LOC_SRC_OFFSET = 0;
186 static constexpr GLuint LOC_DST_OFFSET = 1;
187
188 program_manager.BindHostCompute(copy_bc4_program.handle);
189
190 for (const ImageCopy& copy : copies) {
191 ASSERT(copy.src_subresource.base_layer == 0);
192 ASSERT(copy.src_subresource.num_layers == 1);
193 ASSERT(copy.dst_subresource.base_layer == 0);
194 ASSERT(copy.dst_subresource.num_layers == 1);
195
196 glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
197 glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
198 glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
199 GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
200 glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
201 copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
202 glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
203 }
204 program_manager.RestoreGuestCompute();
205}
206
207GLenum StoreFormat(u32 bytes_per_block) {
208 switch (bytes_per_block) {
209 case 1:
210 return GL_R8UI;
211 case 2:
212 return GL_R16UI;
213 case 4:
214 return GL_R32UI;
215 case 8:
216 return GL_RG32UI;
217 case 16:
218 return GL_RGBA32UI;
219 }
220 UNREACHABLE();
221 return GL_R8UI;
222}
223
224} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h
new file mode 100644
index 000000000..359997255
--- /dev/null
+++ b/src/video_core/renderer_opengl/util_shaders.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/texture_cache/types.h"
14
15namespace OpenGL {
16
17class Image;
18class ImageBufferMap;
19class ProgramManager;
20
21class UtilShaders {
22public:
23 explicit UtilShaders(ProgramManager& program_manager);
24 ~UtilShaders();
25
26 void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
27 std::span<const VideoCommon::SwizzleParameters> swizzles);
28
29 void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
30 std::span<const VideoCommon::SwizzleParameters> swizzles);
31
32 void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
33 std::span<const VideoCommon::SwizzleParameters> swizzles);
34
35 void CopyBC4(Image& dst_image, Image& src_image,
36 std::span<const VideoCommon::ImageCopy> copies);
37
38private:
39 ProgramManager& program_manager;
40
41 OGLBuffer swizzle_table_buffer;
42
43 OGLProgram block_linear_unswizzle_2d_program;
44 OGLProgram block_linear_unswizzle_3d_program;
45 OGLProgram pitch_unswizzle_program;
46 OGLProgram copy_bc4_program;
47};
48
49GLenum StoreFormat(u32 bytes_per_block);
50
51} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
deleted file mode 100644
index 6d7bb16b2..000000000
--- a/src/video_core/renderer_opengl/utils.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6#include <vector>
7
8#include <fmt/format.h>
9#include <glad/glad.h>
10
11#include "common/common_types.h"
12#include "video_core/renderer_opengl/gl_state_tracker.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
18 if (!GLAD_GL_KHR_debug) {
19 // We don't need to throw an error as this is just for debugging
20 return;
21 }
22
23 std::string object_label;
24 if (extra_info.empty()) {
25 switch (identifier) {
26 case GL_TEXTURE:
27 object_label = fmt::format("Texture@0x{:016X}", addr);
28 break;
29 case GL_PROGRAM:
30 object_label = fmt::format("Shader@0x{:016X}", addr);
31 break;
32 default:
33 object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
34 break;
35 }
36 } else {
37 object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
38 }
39 glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
40}
41
42} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
deleted file mode 100644
index 9c09ee12c..000000000
--- a/src/video_core/renderer_opengl/utils.h
+++ /dev/null
@@ -1,16 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string_view>
8#include <vector>
9#include <glad/glad.h>
10#include "common/common_types.h"
11
12namespace OpenGL {
13
14void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
15
16} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
new file mode 100644
index 000000000..87c8e5693
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -0,0 +1,624 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
8#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
9#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
10#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
11#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
12#include "video_core/renderer_vulkan/blit_image.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_shader_util.h"
17#include "video_core/renderer_vulkan/vk_state_tracker.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/renderer_vulkan/wrapper.h"
21#include "video_core/surface.h"
22
23namespace Vulkan {
24
25using VideoCommon::ImageViewType;
26
27namespace {
28struct PushConstants {
29 std::array<float, 2> tex_scale;
30 std::array<float, 2> tex_offset;
31};
32
33template <u32 binding>
34inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
35 .binding = binding,
36 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
37 .descriptorCount = 1,
38 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
39 .pImmutableSamplers = nullptr,
40};
41constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
42 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
43 TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
44};
45constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
46 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
47 .pNext = nullptr,
48 .flags = 0,
49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr,
55 .flags = 0,
56 .bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
57 .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
58};
59constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
60 .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
61 .offset = 0,
62 .size = sizeof(PushConstants),
63};
64constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
65 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
66 .pNext = nullptr,
67 .flags = 0,
68 .vertexBindingDescriptionCount = 0,
69 .pVertexBindingDescriptions = nullptr,
70 .vertexAttributeDescriptionCount = 0,
71 .pVertexAttributeDescriptions = nullptr,
72};
73constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
74 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
75 .pNext = nullptr,
76 .flags = 0,
77 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
78 .primitiveRestartEnable = VK_FALSE,
79};
80constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
81 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
82 .pNext = nullptr,
83 .flags = 0,
84 .viewportCount = 1,
85 .pViewports = nullptr,
86 .scissorCount = 1,
87 .pScissors = nullptr,
88};
89constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
90 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
91 .pNext = nullptr,
92 .flags = 0,
93 .depthClampEnable = VK_FALSE,
94 .rasterizerDiscardEnable = VK_FALSE,
95 .polygonMode = VK_POLYGON_MODE_FILL,
96 .cullMode = VK_CULL_MODE_BACK_BIT,
97 .frontFace = VK_FRONT_FACE_CLOCKWISE,
98 .depthBiasEnable = VK_FALSE,
99 .depthBiasConstantFactor = 0.0f,
100 .depthBiasClamp = 0.0f,
101 .depthBiasSlopeFactor = 0.0f,
102 .lineWidth = 1.0f,
103};
104constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
106 .pNext = nullptr,
107 .flags = 0,
108 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
109 .sampleShadingEnable = VK_FALSE,
110 .minSampleShading = 0.0f,
111 .pSampleMask = nullptr,
112 .alphaToCoverageEnable = VK_FALSE,
113 .alphaToOneEnable = VK_FALSE,
114};
115constexpr std::array DYNAMIC_STATES{
116 VK_DYNAMIC_STATE_VIEWPORT,
117 VK_DYNAMIC_STATE_SCISSOR,
118};
119constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
120 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
121 .pNext = nullptr,
122 .flags = 0,
123 .dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
124 .pDynamicStates = DYNAMIC_STATES.data(),
125};
126constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
128 .pNext = nullptr,
129 .flags = 0,
130 .logicOpEnable = VK_FALSE,
131 .logicOp = VK_LOGIC_OP_CLEAR,
132 .attachmentCount = 0,
133 .pAttachments = nullptr,
134 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
135};
136constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
137 .blendEnable = VK_FALSE,
138 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
139 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
140 .colorBlendOp = VK_BLEND_OP_ADD,
141 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
142 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
143 .alphaBlendOp = VK_BLEND_OP_ADD,
144 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
145 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
146};
147constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
148 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
149 .pNext = nullptr,
150 .flags = 0,
151 .logicOpEnable = VK_FALSE,
152 .logicOp = VK_LOGIC_OP_CLEAR,
153 .attachmentCount = 1,
154 .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
155 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
156};
157constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
158 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
159 .pNext = nullptr,
160 .flags = 0,
161 .depthTestEnable = VK_TRUE,
162 .depthWriteEnable = VK_TRUE,
163 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
164 .depthBoundsTestEnable = VK_FALSE,
165 .stencilTestEnable = VK_FALSE,
166 .front = VkStencilOpState{},
167 .back = VkStencilOpState{},
168 .minDepthBounds = 0.0f,
169 .maxDepthBounds = 0.0f,
170};
171
172template <VkFilter filter>
173inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
174 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
175 .pNext = nullptr,
176 .flags = 0,
177 .magFilter = filter,
178 .minFilter = filter,
179 .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
180 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
181 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
182 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
183 .mipLodBias = 0.0f,
184 .anisotropyEnable = VK_FALSE,
185 .maxAnisotropy = 0.0f,
186 .compareEnable = VK_FALSE,
187 .compareOp = VK_COMPARE_OP_NEVER,
188 .minLod = 0.0f,
189 .maxLod = 0.0f,
190 .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
191 .unnormalizedCoordinates = VK_TRUE,
192};
193
194constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
195 const VkDescriptorSetLayout* set_layout) {
196 return VkPipelineLayoutCreateInfo{
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
198 .pNext = nullptr,
199 .flags = 0,
200 .setLayoutCount = 1,
201 .pSetLayouts = set_layout,
202 .pushConstantRangeCount = 1,
203 .pPushConstantRanges = &PUSH_CONSTANT_RANGE,
204 };
205}
206
207constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
208 VkShaderModule shader) {
209 return VkPipelineShaderStageCreateInfo{
210 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
211 .pNext = nullptr,
212 .flags = 0,
213 .stage = stage,
214 .module = shader,
215 .pName = "main",
216 .pSpecializationInfo = nullptr,
217 };
218}
219
220constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
221 VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
222 return std::array{
223 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
224 PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
225 };
226}
227
228void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
229 VkSampler sampler, VkImageView image_view) {
230 const VkDescriptorImageInfo image_info{
231 .sampler = sampler,
232 .imageView = image_view,
233 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
234 };
235 const VkWriteDescriptorSet write_descriptor_set{
236 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
237 .pNext = nullptr,
238 .dstSet = descriptor_set,
239 .dstBinding = 0,
240 .dstArrayElement = 0,
241 .descriptorCount = 1,
242 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
243 .pImageInfo = &image_info,
244 .pBufferInfo = nullptr,
245 .pTexelBufferView = nullptr,
246 };
247 device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
248}
249
250void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
251 VkSampler sampler, VkImageView image_view_0,
252 VkImageView image_view_1) {
253 const VkDescriptorImageInfo image_info_0{
254 .sampler = sampler,
255 .imageView = image_view_0,
256 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
257 };
258 const VkDescriptorImageInfo image_info_1{
259 .sampler = sampler,
260 .imageView = image_view_1,
261 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
262 };
263 const std::array write_descriptor_sets{
264 VkWriteDescriptorSet{
265 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
266 .pNext = nullptr,
267 .dstSet = descriptor_set,
268 .dstBinding = 0,
269 .dstArrayElement = 0,
270 .descriptorCount = 1,
271 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
272 .pImageInfo = &image_info_0,
273 .pBufferInfo = nullptr,
274 .pTexelBufferView = nullptr,
275 },
276 VkWriteDescriptorSet{
277 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
278 .pNext = nullptr,
279 .dstSet = descriptor_set,
280 .dstBinding = 1,
281 .dstArrayElement = 0,
282 .descriptorCount = 1,
283 .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
284 .pImageInfo = &image_info_1,
285 .pBufferInfo = nullptr,
286 .pTexelBufferView = nullptr,
287 },
288 };
289 device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
290}
291
292void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
293 const std::array<Offset2D, 2>& dst_region,
294 const std::array<Offset2D, 2>& src_region) {
295 const VkOffset2D offset{
296 .x = std::min(dst_region[0].x, dst_region[1].x),
297 .y = std::min(dst_region[0].y, dst_region[1].y),
298 };
299 const VkExtent2D extent{
300 .width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
301 .height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
302 };
303 const VkViewport viewport{
304 .x = static_cast<float>(offset.x),
305 .y = static_cast<float>(offset.y),
306 .width = static_cast<float>(extent.width),
307 .height = static_cast<float>(extent.height),
308 .minDepth = 0.0f,
309 .maxDepth = 1.0f,
310 };
311 // TODO: Support scissored blits
312 const VkRect2D scissor{
313 .offset = offset,
314 .extent = extent,
315 };
316 const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
317 const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
318 const PushConstants push_constants{
319 .tex_scale = {scale_x, scale_y},
320 .tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
321 };
322 cmdbuf.SetViewport(0, viewport);
323 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325}
326
327} // Anonymous namespace
328
329BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
341 PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
342 full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
343 blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
344 convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
345 convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
346 linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
347 nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
348 if (device.IsExtShaderStencilExportSupported()) {
349 blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
350 }
351}
352
353BlitImageHelper::~BlitImageHelper() = default;
354
355void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
356 const std::array<Offset2D, 2>& dst_region,
357 const std::array<Offset2D, 2>& src_region,
358 Tegra::Engines::Fermi2D::Filter filter,
359 Tegra::Engines::Fermi2D::Operation operation) {
360 const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
361 const BlitImagePipelineKey key{
362 .renderpass = dst_framebuffer->RenderPass(),
363 .operation = operation,
364 };
365 const VkPipelineLayout layout = *one_texture_pipeline_layout;
366 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
367 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
368 const VkPipeline pipeline = FindOrEmplacePipeline(key);
369 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
370 scheduler.RequestRenderpass(dst_framebuffer);
371 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
372 &device = device](vk::CommandBuffer cmdbuf) {
373 // TODO: Barriers
374 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
375 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
376 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
377 nullptr);
378 BindBlitState(cmdbuf, layout, dst_region, src_region);
379 cmdbuf.Draw(3, 1, 0, 0);
380 });
381 scheduler.InvalidateState();
382}
383
384void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
385 VkImageView src_depth_view, VkImageView src_stencil_view,
386 const std::array<Offset2D, 2>& dst_region,
387 const std::array<Offset2D, 2>& src_region,
388 Tegra::Engines::Fermi2D::Filter filter,
389 Tegra::Engines::Fermi2D::Operation operation) {
390 ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
391 ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
392
393 const VkPipelineLayout layout = *two_textures_pipeline_layout;
394 const VkSampler sampler = *nearest_sampler;
395 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
396 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
397 scheduler.RequestRenderpass(dst_framebuffer);
398 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
399 src_stencil_view, descriptor_set,
400 &device = device](vk::CommandBuffer cmdbuf) {
401 // TODO: Barriers
402 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
403 src_stencil_view);
404 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
405 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
406 nullptr);
407 BindBlitState(cmdbuf, layout, dst_region, src_region);
408 cmdbuf.Draw(3, 1, 0, 0);
409 });
410 scheduler.InvalidateState();
411}
412
413void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
414 const ImageView& src_image_view) {
415 ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
416 Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
417}
418
419void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
420 const ImageView& src_image_view) {
421
422 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
423 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
424}
425
426void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
427 const ImageView& src_image_view) {
428 ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
429 Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
430}
431
432void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
433 const ImageView& src_image_view) {
434 ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
435 Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
436}
437
438void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
439 const ImageView& src_image_view) {
440 const VkPipelineLayout layout = *one_texture_pipeline_layout;
441 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
442 const VkSampler sampler = *nearest_sampler;
443 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
444 const VkExtent2D extent{
445 .width = src_image_view.size.width,
446 .height = src_image_view.size.height,
447 };
448 scheduler.RequestRenderpass(dst_framebuffer);
449 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
450 &device = device](vk::CommandBuffer cmdbuf) {
451 const VkOffset2D offset{
452 .x = 0,
453 .y = 0,
454 };
455 const VkViewport viewport{
456 .x = 0.0f,
457 .y = 0.0f,
458 .width = static_cast<float>(extent.width),
459 .height = static_cast<float>(extent.height),
460 .minDepth = 0.0f,
461 .maxDepth = 0.0f,
462 };
463 const VkRect2D scissor{
464 .offset = offset,
465 .extent = extent,
466 };
467 const PushConstants push_constants{
468 .tex_scale = {viewport.width, viewport.height},
469 .tex_offset = {0.0f, 0.0f},
470 };
471 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
472
473 // TODO: Barriers
474 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
475 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
476 nullptr);
477 cmdbuf.SetViewport(0, viewport);
478 cmdbuf.SetScissor(0, scissor);
479 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
480 cmdbuf.Draw(3, 1, 0, 0);
481 });
482 scheduler.InvalidateState();
483}
484
485VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
486 const auto it = std::ranges::find(blit_color_keys, key);
487 if (it != blit_color_keys.end()) {
488 return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
489 }
490 blit_color_keys.push_back(key);
491
492 const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
493 const VkPipelineColorBlendAttachmentState blend_attachment{
494 .blendEnable = VK_FALSE,
495 .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
496 .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
497 .colorBlendOp = VK_BLEND_OP_ADD,
498 .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
499 .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
500 .alphaBlendOp = VK_BLEND_OP_ADD,
501 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
502 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
503 };
504 // TODO: programmable blending
505 const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
506 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
507 .pNext = nullptr,
508 .flags = 0,
509 .logicOpEnable = VK_FALSE,
510 .logicOp = VK_LOGIC_OP_CLEAR,
511 .attachmentCount = 1,
512 .pAttachments = &blend_attachment,
513 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
514 };
515 blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
516 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
517 .pNext = nullptr,
518 .flags = 0,
519 .stageCount = static_cast<u32>(stages.size()),
520 .pStages = stages.data(),
521 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
522 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
523 .pTessellationState = nullptr,
524 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
525 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
526 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
527 .pDepthStencilState = nullptr,
528 .pColorBlendState = &color_blend_create_info,
529 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
530 .layout = *one_texture_pipeline_layout,
531 .renderPass = key.renderpass,
532 .subpass = 0,
533 .basePipelineHandle = VK_NULL_HANDLE,
534 .basePipelineIndex = 0,
535 }));
536 return *blit_color_pipelines.back();
537}
538
539VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
540 if (blit_depth_stencil_pipeline) {
541 return *blit_depth_stencil_pipeline;
542 }
543 const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
544 blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
545 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
546 .pNext = nullptr,
547 .flags = 0,
548 .stageCount = static_cast<u32>(stages.size()),
549 .pStages = stages.data(),
550 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
551 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
552 .pTessellationState = nullptr,
553 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
554 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
555 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
556 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
557 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
558 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
559 .layout = *two_textures_pipeline_layout,
560 .renderPass = renderpass,
561 .subpass = 0,
562 .basePipelineHandle = VK_NULL_HANDLE,
563 .basePipelineIndex = 0,
564 });
565 return *blit_depth_stencil_pipeline;
566}
567
568void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
569 if (pipeline) {
570 return;
571 }
572 const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
573 pipeline = device.GetLogical().CreateGraphicsPipeline({
574 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
575 .pNext = nullptr,
576 .flags = 0,
577 .stageCount = static_cast<u32>(stages.size()),
578 .pStages = stages.data(),
579 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
580 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
581 .pTessellationState = nullptr,
582 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
583 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
584 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
585 .pDepthStencilState = nullptr,
586 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
587 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
588 .layout = *one_texture_pipeline_layout,
589 .renderPass = renderpass,
590 .subpass = 0,
591 .basePipelineHandle = VK_NULL_HANDLE,
592 .basePipelineIndex = 0,
593 });
594}
595
596void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
597 if (pipeline) {
598 return;
599 }
600 const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
601 pipeline = device.GetLogical().CreateGraphicsPipeline({
602 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
603 .pNext = nullptr,
604 .flags = 0,
605 .stageCount = static_cast<u32>(stages.size()),
606 .pStages = stages.data(),
607 .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
608 .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
609 .pTessellationState = nullptr,
610 .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
611 .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
612 .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
613 .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
614 .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
615 .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
616 .layout = *one_texture_pipeline_layout,
617 .renderPass = renderpass,
618 .subpass = 0,
619 .basePipelineHandle = VK_NULL_HANDLE,
620 .basePipelineIndex = 0,
621 });
622}
623
624} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
new file mode 100644
index 000000000..2c2790bf9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8
9#include "video_core/engines/fermi_2d.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12#include "video_core/texture_cache/types.h"
13
14namespace Vulkan {
15
16using VideoCommon::Offset2D;
17
18class VKDevice;
19class VKScheduler;
20class StateTracker;
21
22class Framebuffer;
23class ImageView;
24
25struct BlitImagePipelineKey {
26 constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
27
28 VkRenderPass renderpass;
29 Tegra::Engines::Fermi2D::Operation operation;
30};
31
32class BlitImageHelper {
33public:
34 explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
35 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
36 ~BlitImageHelper();
37
38 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
39 const std::array<Offset2D, 2>& dst_region,
40 const std::array<Offset2D, 2>& src_region,
41 Tegra::Engines::Fermi2D::Filter filter,
42 Tegra::Engines::Fermi2D::Operation operation);
43
44 void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
45 VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
46 const std::array<Offset2D, 2>& src_region,
47 Tegra::Engines::Fermi2D::Filter filter,
48 Tegra::Engines::Fermi2D::Operation operation);
49
50 void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
51
52 void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
53
54 void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
55
56 void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
57
58private:
59 void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
60 const ImageView& src_image_view);
61
62 [[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
63
64 [[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
65
66 void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
67
68 void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
69
70 const VKDevice& device;
71 VKScheduler& scheduler;
72 StateTracker& state_tracker;
73
74 vk::DescriptorSetLayout one_texture_set_layout;
75 vk::DescriptorSetLayout two_textures_set_layout;
76 DescriptorAllocator one_texture_descriptor_allocator;
77 DescriptorAllocator two_textures_descriptor_allocator;
78 vk::PipelineLayout one_texture_pipeline_layout;
79 vk::PipelineLayout two_textures_pipeline_layout;
80 vk::ShaderModule full_screen_vert;
81 vk::ShaderModule blit_color_to_color_frag;
82 vk::ShaderModule blit_depth_stencil_frag;
83 vk::ShaderModule convert_depth_to_float_frag;
84 vk::ShaderModule convert_float_to_depth_frag;
85 vk::Sampler linear_sampler;
86 vk::Sampler nearest_sampler;
87
88 std::vector<BlitImagePipelineKey> blit_color_keys;
89 std::vector<vk::Pipeline> blit_color_pipelines;
90 vk::Pipeline blit_depth_stencil_pipeline;
91 vk::Pipeline convert_d32_to_r32_pipeline;
92 vk::Pipeline convert_r32_to_d32_pipeline;
93 vk::Pipeline convert_d16_to_r16_pipeline;
94 vk::Pipeline convert_r16_to_d16_pipeline;
95};
96
97} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 5ec43db11..67dd10500 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
60 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 60 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); 61 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
62 topology.Assign(regs.draw.topology); 62 topology.Assign(regs.draw.topology);
63 msaa_mode.Assign(regs.multisample_mode);
63 64
64 raw2 = 0; 65 raw2 = 0;
65 const auto test_func = 66 const auto test_func =
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index c26b77790..7e95e6fce 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -186,6 +186,7 @@ struct FixedPipelineState {
186 BitField<19, 4, u32> logic_op; 186 BitField<19, 4, u32> logic_op;
187 BitField<23, 1, u32> rasterize_enable; 187 BitField<23, 1, u32> rasterize_enable;
188 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 188 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
189 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
189 }; 190 };
190 union { 191 union {
191 u32 raw2; 192 u32 raw2;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 58e117eb3..40501e7fa 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -122,7 +122,7 @@ struct FormatTuple {
122 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT 122 {VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT 123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
124 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM 124 {VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
125 {VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM 125 {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM 126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
127 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM 127 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
128 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT 128 {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
@@ -163,7 +163,7 @@ struct FormatTuple {
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT 164 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT 165 {VK_FORMAT_UNDEFINED}, // R16G16_UINT
166 {VK_FORMAT_UNDEFINED}, // R16G16_SINT 166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM 167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT 168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB 169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
@@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
233 233
234 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 234 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
235 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 235 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
236 tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) 236 const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
237 ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 237 tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
238 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
239 } 238 }
240 const bool attachable = tuple.usage & Attachable; 239 const bool attachable = tuple.usage & Attachable;
241 const bool storage = tuple.usage & Storage; 240 const bool storage = tuple.usage & Storage;
242 241
243 VkFormatFeatureFlags usage; 242 VkFormatFeatureFlags usage;
244 if (format_type == FormatType::Buffer) { 243 switch (format_type) {
244 case FormatType::Buffer:
245 usage = 245 usage =
246 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; 246 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
247 } else { 247 break;
248 case FormatType::Linear:
249 case FormatType::Optimal:
248 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | 250 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
249 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; 251 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
250 if (attachable) { 252 if (attachable) {
@@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
254 if (storage) { 256 if (storage) {
255 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; 257 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
256 } 258 }
259 break;
257 } 260 }
258 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 261 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
259} 262}
@@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
724 return {}; 727 return {};
725} 728}
726 729
730VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
731 switch (reduction) {
732 case Tegra::Texture::SamplerReduction::WeightedAverage:
733 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
734 case Tegra::Texture::SamplerReduction::Min:
735 return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
736 case Tegra::Texture::SamplerReduction::Max:
737 return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
738 }
739 UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
740 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
741}
742
727} // namespace Vulkan::MaxwellToVK 743} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 7e213452f..1a90f192e 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
61 61
62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 62VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
63 63
64VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
65
64} // namespace Vulkan::MaxwellToVK 66} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index ea4b7c1e6..7f521cb9b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() {
92 return library; 92 return library;
93} 93}
94 94
95std::pair<vk::Instance, u32> CreateInstance( 95std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
96 Common::DynamicLibrary& library, vk::InstanceDispatch& dld, 96 vk::InstanceDispatch& dld, WindowSystemType window_type,
97 WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) { 97 bool enable_debug_utils, bool enable_layers) {
98 if (!library.IsOpen()) { 98 if (!library.IsOpen()) {
99 LOG_ERROR(Render_Vulkan, "Vulkan library not available"); 99 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
100 return {}; 100 return {};
@@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance(
133 if (window_type != Core::Frontend::WindowSystemType::Headless) { 133 if (window_type != Core::Frontend::WindowSystemType::Headless) {
134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); 134 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
135 } 135 }
136 if (enable_layers) { 136 if (enable_debug_utils) {
137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); 137 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
138 } 138 }
139 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); 139 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
287bool RendererVulkan::Init() { 287bool RendererVulkan::Init() {
288 library = OpenVulkanLibrary(); 288 library = OpenVulkanLibrary();
289 std::tie(instance, instance_version) = CreateInstance( 289 std::tie(instance, instance_version) = CreateInstance(
290 library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug); 290 library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
291 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { 291 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
292 return false; 292 return false;
293 } 293 }
@@ -447,7 +447,8 @@ void RendererVulkan::Report() const {
447std::vector<std::string> RendererVulkan::EnumerateDevices() { 447std::vector<std::string> RendererVulkan::EnumerateDevices() {
448 vk::InstanceDispatch dld; 448 vk::InstanceDispatch dld;
449 Common::DynamicLibrary library = OpenVulkanLibrary(); 449 Common::DynamicLibrary library = OpenVulkanLibrary();
450 vk::Instance instance = CreateInstance(library, dld).first; 450 vk::Instance instance =
451 CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
451 if (!instance) { 452 if (!instance) {
452 return {}; 453 return {};
453 } 454 }
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 977b86003..74642fba4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -33,10 +33,9 @@ class VKDevice;
33class VKMemoryManager; 33class VKMemoryManager;
34class VKSwapchain; 34class VKSwapchain;
35class VKScheduler; 35class VKScheduler;
36class VKImage;
37 36
38struct VKScreenInfo { 37struct VKScreenInfo {
39 VKImage* image{}; 38 VkImageView image_view{};
40 u32 width{}; 39 u32 width{};
41 u32 height{}; 40 u32 height{};
42 bool is_srgb{}; 41 bool is_srgb{};
diff --git a/src/video_core/renderer_vulkan/shaders/blit.frag b/src/video_core/renderer_vulkan/shaders/blit.frag
deleted file mode 100644
index a06ecd24a..000000000
--- a/src/video_core/renderer_vulkan/shaders/blit.frag
+++ /dev/null
@@ -1,24 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (location = 0) in vec2 frag_tex_coord;
17
18layout (location = 0) out vec4 color;
19
20layout (binding = 1) uniform sampler2D color_texture;
21
22void main() {
23 color = texture(color_texture, frag_tex_coord);
24}
diff --git a/src/video_core/renderer_vulkan/shaders/blit.vert b/src/video_core/renderer_vulkan/shaders/blit.vert
deleted file mode 100644
index c64d9235a..000000000
--- a/src/video_core/renderer_vulkan/shaders/blit.vert
+++ /dev/null
@@ -1,28 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (location = 0) in vec2 vert_position;
17layout (location = 1) in vec2 vert_tex_coord;
18
19layout (location = 0) out vec2 frag_tex_coord;
20
21layout (set = 0, binding = 0) uniform MatrixBlock {
22 mat4 modelview_matrix;
23};
24
25void main() {
26 gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0);
27 frag_tex_coord = vert_tex_coord;
28}
diff --git a/src/video_core/renderer_vulkan/shaders/quad_array.comp b/src/video_core/renderer_vulkan/shaders/quad_array.comp
deleted file mode 100644
index 5a5703308..000000000
--- a/src/video_core/renderer_vulkan/shaders/quad_array.comp
+++ /dev/null
@@ -1,37 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (local_size_x = 1024) in;
17
18layout (std430, set = 0, binding = 0) buffer OutputBuffer {
19 uint output_indexes[];
20};
21
22layout (push_constant) uniform PushConstants {
23 uint first;
24};
25
26void main() {
27 uint primitive = gl_GlobalInvocationID.x;
28 if (primitive * 6 >= output_indexes.length()) {
29 return;
30 }
31
32 const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3);
33 for (uint vertex = 0; vertex < 6; ++vertex) {
34 uint index = first + primitive * 4 + quad_map[vertex];
35 output_indexes[primitive * 6 + vertex] = index;
36 }
37}
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
deleted file mode 100644
index 5a472ba9b..000000000
--- a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
+++ /dev/null
@@ -1,50 +0,0 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V quad_indexed.comp -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (local_size_x = 1024) in;
17
18layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
19 uint input_indexes[];
20};
21
22layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
23 uint output_indexes[];
24};
25
26layout (push_constant) uniform PushConstants {
27 uint base_vertex;
28 int index_shift; // 0: uint8, 1: uint16, 2: uint32
29};
30
31void main() {
32 int primitive = int(gl_GlobalInvocationID.x);
33 if (primitive * 6 >= output_indexes.length()) {
34 return;
35 }
36
37 int index_size = 8 << index_shift;
38 int flipped_shift = 2 - index_shift;
39 int mask = (1 << flipped_shift) - 1;
40
41 const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
42 for (uint vertex = 0; vertex < 6; ++vertex) {
43 int offset = primitive * 4 + quad_swizzle[vertex];
44 int int_offset = offset >> flipped_shift;
45 int bit_offset = (offset & mask) * index_size;
46 uint packed_input = input_indexes[int_offset];
47 uint index = bitfieldExtract(packed_input, bit_offset, index_size);
48 output_indexes[primitive * 6 + vertex] = index + base_vertex;
49 }
50}
diff --git a/src/video_core/renderer_vulkan/shaders/uint8.comp b/src/video_core/renderer_vulkan/shaders/uint8.comp
deleted file mode 100644
index a320f3ae0..000000000
--- a/src/video_core/renderer_vulkan/shaders/uint8.comp
+++ /dev/null
@@ -1,33 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V $THIS_FILE -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15#extension GL_EXT_shader_16bit_storage : require
16#extension GL_EXT_shader_8bit_storage : require
17
18layout (local_size_x = 1024) in;
19
20layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
21 uint8_t input_indexes[];
22};
23
24layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
25 uint16_t output_indexes[];
26};
27
28void main() {
29 uint id = gl_GlobalInvocationID.x;
30 if (id < input_indexes.length()) {
31 output_indexes[id] = uint16_t(input_indexes[id]);
32 }
33}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index b5b60309e..d3a83f22f 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -16,12 +16,12 @@
16#include "core/frontend/emu_window.h" 16#include "core/frontend/emu_window.h"
17#include "core/memory.h" 17#include "core/memory.h"
18#include "video_core/gpu.h" 18#include "video_core/gpu.h"
19#include "video_core/morton.h" 19#include "video_core/host_shaders/vulkan_present_frag_spv.h"
20#include "video_core/host_shaders/vulkan_present_vert_spv.h"
20#include "video_core/rasterizer_interface.h" 21#include "video_core/rasterizer_interface.h"
21#include "video_core/renderer_vulkan/renderer_vulkan.h" 22#include "video_core/renderer_vulkan/renderer_vulkan.h"
22#include "video_core/renderer_vulkan/vk_blit_screen.h" 23#include "video_core/renderer_vulkan/vk_blit_screen.h"
23#include "video_core/renderer_vulkan/vk_device.h" 24#include "video_core/renderer_vulkan/vk_device.h"
24#include "video_core/renderer_vulkan/vk_image.h"
25#include "video_core/renderer_vulkan/vk_master_semaphore.h" 25#include "video_core/renderer_vulkan/vk_master_semaphore.h"
26#include "video_core/renderer_vulkan/vk_memory_manager.h" 26#include "video_core/renderer_vulkan/vk_memory_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -29,108 +29,12 @@
29#include "video_core/renderer_vulkan/vk_swapchain.h" 29#include "video_core/renderer_vulkan/vk_swapchain.h"
30#include "video_core/renderer_vulkan/wrapper.h" 30#include "video_core/renderer_vulkan/wrapper.h"
31#include "video_core/surface.h" 31#include "video_core/surface.h"
32#include "video_core/textures/decoders.h"
32 33
33namespace Vulkan { 34namespace Vulkan {
34 35
35namespace { 36namespace {
36 37
37// Generated from the "shaders/" directory, read the instructions there.
38constexpr u8 blit_vertex_code[] = {
39 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
40 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
41 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
42 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
43 0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
44 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
45 0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
46 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
47 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
48 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
49 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
50 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
51 0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
52 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
53 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
54 0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
55 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
56 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
57 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
58 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
59 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
60 0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
61 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
63 0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
64 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
65 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
66 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
67 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
68 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
69 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
70 0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
71 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
72 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
73 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
74 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
75 0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
76 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
77 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
78 0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
79 0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
80 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
81 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
82 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
83 0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
84 0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
85 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
86 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
87 0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
88 0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
89 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
90 0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
91 0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
92 0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
93 0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
94 0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
95 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
96 0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
97 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
98 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
99 0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
100 0x38, 0x00, 0x01, 0x00};
101
102constexpr u8 blit_fragment_code[] = {
103 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
104 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
105 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
106 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
107 0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
108 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
109 0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
110 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
111 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
112 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
113 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
114 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
115 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
116 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
117 0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
118 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
119 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
120 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
121 0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
122 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
123 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
124 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
125 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
126 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
127 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
128 0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
129 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
130 0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
131 0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
132 0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
133
134struct ScreenRectVertex { 38struct ScreenRectVertex {
135 ScreenRectVertex() = default; 39 ScreenRectVertex() = default;
136 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} 40 explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
@@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
173 // clang-format on 77 // clang-format on
174} 78}
175 79
176std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { 80u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
177 using namespace VideoCore::Surface; 81 using namespace VideoCore::Surface;
178 return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); 82 return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
179} 83}
180 84
181std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { 85std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
@@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
239 scheduler.Wait(resource_ticks[image_index]); 143 scheduler.Wait(resource_ticks[image_index]);
240 resource_ticks[image_index] = scheduler.CurrentTick(); 144 resource_ticks[image_index] = scheduler.CurrentTick();
241 145
242 VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); 146 UpdateDescriptorSet(image_index,
243 147 use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
244 UpdateDescriptorSet(image_index, blit_image->GetPresentView());
245 148
246 BufferData data; 149 BufferData data;
247 SetUniformData(data, framebuffer); 150 SetUniformData(data, framebuffer);
248 SetVertexData(data, framebuffer); 151 SetVertexData(data, framebuffer);
249 152
250 auto map = buffer_commit->Map(); 153 auto map = buffer_commit->Map();
251 std::memcpy(map.GetAddress(), &data, sizeof(data)); 154 std::memcpy(map.Address(), &data, sizeof(data));
252 155
253 if (!use_accelerated) { 156 if (!use_accelerated) {
254 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 157 const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
255 158
256 const auto pixel_format =
257 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
258 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 159 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
259 const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); 160 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
260 rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); 161 const size_t size_bytes = GetSizeInBytes(framebuffer);
162 rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
261 163
262 // TODO(Rodrigo): Read this from HLE 164 // TODO(Rodrigo): Read this from HLE
263 constexpr u32 block_height_log2 = 4; 165 constexpr u32 block_height_log2 = 4;
264 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format, 166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
265 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 167 Tegra::Texture::UnswizzleTexture(
266 map.GetAddress() + image_offset, host_ptr); 168 std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
267 169 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
268 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
269 VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
270 170
271 const VkBufferImageCopy copy{ 171 const VkBufferImageCopy copy{
272 .bufferOffset = image_offset, 172 .bufferOffset = image_offset,
@@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
288 }, 188 },
289 }; 189 };
290 scheduler.Record( 190 scheduler.Record(
291 [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { 191 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
292 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 192 const VkImageMemoryBarrier base_barrier{
193 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
194 .pNext = nullptr,
195 .srcAccessMask = 0,
196 .dstAccessMask = 0,
197 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
198 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
199 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
200 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
201 .image = image,
202 .subresourceRange =
203 {
204 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
205 .baseMipLevel = 0,
206 .levelCount = 1,
207 .baseArrayLayer = 0,
208 .layerCount = 1,
209 },
210 };
211 VkImageMemoryBarrier read_barrier = base_barrier;
212 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
213 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
215
216 VkImageMemoryBarrier write_barrier = base_barrier;
217 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
218 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
219
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
221 0, read_barrier);
222 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
223 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
224 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
293 }); 225 });
294 } 226 }
295 map.Release(); 227 map.Release();
296 228
297 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
298 VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
299
300 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 229 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
301 descriptor_set = descriptor_sets[image_index], buffer = *buffer, 230 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
302 size = swapchain.GetSize(), pipeline = *pipeline, 231 size = swapchain.GetSize(), pipeline = *pipeline,
@@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
304 const VkClearValue clear_color{ 233 const VkClearValue clear_color{
305 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}}, 234 .color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
306 }; 235 };
307 236 const VkRenderPassBeginInfo renderpass_bi{
308 VkRenderPassBeginInfo renderpass_bi; 237 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
309 renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 238 .pNext = nullptr,
310 renderpass_bi.pNext = nullptr; 239 .renderPass = renderpass,
311 renderpass_bi.renderPass = renderpass; 240 .framebuffer = framebuffer,
312 renderpass_bi.framebuffer = framebuffer; 241 .renderArea =
313 renderpass_bi.renderArea.offset.x = 0; 242 {
314 renderpass_bi.renderArea.offset.y = 0; 243 .offset = {0, 0},
315 renderpass_bi.renderArea.extent = size; 244 .extent = size,
316 renderpass_bi.clearValueCount = 1; 245 },
317 renderpass_bi.pClearValues = &clear_color; 246 .clearValueCount = 1,
318 247 .pClearValues = &clear_color,
319 VkViewport viewport; 248 };
320 viewport.x = 0.0f; 249 const VkViewport viewport{
321 viewport.y = 0.0f; 250 .x = 0.0f,
322 viewport.width = static_cast<float>(size.width); 251 .y = 0.0f,
323 viewport.height = static_cast<float>(size.height); 252 .width = static_cast<float>(size.width),
324 viewport.minDepth = 0.0f; 253 .height = static_cast<float>(size.height),
325 viewport.maxDepth = 1.0f; 254 .minDepth = 0.0f,
326 255 .maxDepth = 1.0f,
327 VkRect2D scissor; 256 };
328 scissor.offset.x = 0; 257 const VkRect2D scissor{
329 scissor.offset.y = 0; 258 .offset = {0, 0},
330 scissor.extent = size; 259 .extent = size,
331 260 };
332 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 261 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
333 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 262 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
334 cmdbuf.SetViewport(0, viewport); 263 cmdbuf.SetViewport(0, viewport);
@@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
372} 301}
373 302
374void VKBlitScreen::CreateShaders() { 303void VKBlitScreen::CreateShaders() {
375 vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code); 304 vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
376 fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code); 305 fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
377} 306}
378 307
379void VKBlitScreen::CreateSemaphores() { 308void VKBlitScreen::CreateSemaphores() {
@@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {
420 349
421 const VkAttachmentReference color_attachment_ref{ 350 const VkAttachmentReference color_attachment_ref{
422 .attachment = 0, 351 .attachment = 0,
423 .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 352 .layout = VK_IMAGE_LAYOUT_GENERAL,
424 }; 353 };
425 354
426 const VkSubpassDescription subpass_description{ 355 const VkSubpassDescription subpass_description{
@@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
735 664
736void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { 665void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
737 raw_images.resize(image_count); 666 raw_images.resize(image_count);
667 raw_image_views.resize(image_count);
738 raw_buffer_commits.resize(image_count); 668 raw_buffer_commits.resize(image_count);
739 669
740 const VkImageCreateInfo ci{ 670 for (size_t i = 0; i < image_count; ++i) {
741 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 671 raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
742 .pNext = nullptr, 672 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
743 .flags = 0, 673 .pNext = nullptr,
744 .imageType = VK_IMAGE_TYPE_2D, 674 .flags = 0,
745 .format = GetFormat(framebuffer), 675 .imageType = VK_IMAGE_TYPE_2D,
746 .extent = 676 .format = GetFormat(framebuffer),
747 { 677 .extent =
748 .width = framebuffer.width, 678 {
749 .height = framebuffer.height, 679 .width = framebuffer.width,
750 .depth = 1, 680 .height = framebuffer.height,
751 }, 681 .depth = 1,
752 .mipLevels = 1, 682 },
753 .arrayLayers = 1, 683 .mipLevels = 1,
754 .samples = VK_SAMPLE_COUNT_1_BIT, 684 .arrayLayers = 1,
755 .tiling = VK_IMAGE_TILING_LINEAR, 685 .samples = VK_SAMPLE_COUNT_1_BIT,
756 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 686 .tiling = VK_IMAGE_TILING_LINEAR,
757 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 687 .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
758 .queueFamilyIndexCount = 0, 688 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
759 .pQueueFamilyIndices = nullptr, 689 .queueFamilyIndexCount = 0,
760 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 690 .pQueueFamilyIndices = nullptr,
761 }; 691 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
762 692 });
763 for (std::size_t i = 0; i < image_count; ++i) { 693 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
764 raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); 694 raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
765 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); 695 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
696 .pNext = nullptr,
697 .flags = 0,
698 .image = *raw_images[i],
699 .viewType = VK_IMAGE_VIEW_TYPE_2D,
700 .format = GetFormat(framebuffer),
701 .components =
702 {
703 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
704 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
705 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
706 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
707 },
708 .subresourceRange =
709 {
710 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
711 .baseMipLevel = 0,
712 .levelCount = 1,
713 .baseArrayLayer = 0,
714 .layerCount = 1,
715 },
716 });
766 } 717 }
767} 718}
768 719
@@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
789 const VkDescriptorImageInfo image_info{ 740 const VkDescriptorImageInfo image_info{
790 .sampler = *sampler, 741 .sampler = *sampler,
791 .imageView = image_view, 742 .imageView = image_view,
792 .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, 743 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
793 }; 744 };
794 745
795 const VkWriteDescriptorSet sampler_write{ 746 const VkWriteDescriptorSet sampler_write{
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 8f2839214..2ee374247 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -35,7 +35,6 @@ struct ScreenInfo;
35 35
36class RasterizerVulkan; 36class RasterizerVulkan;
37class VKDevice; 37class VKDevice;
38class VKImage;
39class VKScheduler; 38class VKScheduler;
40class VKSwapchain; 39class VKSwapchain;
41 40
@@ -110,7 +109,8 @@ private:
110 std::vector<u64> resource_ticks; 109 std::vector<u64> resource_ticks;
111 110
112 std::vector<vk::Semaphore> semaphores; 111 std::vector<vk::Semaphore> semaphores;
113 std::vector<std::unique_ptr<VKImage>> raw_images; 112 std::vector<vk::Image> raw_images;
113 std::vector<vk::ImageView> raw_image_views;
114 std::vector<VKMemoryCommit> raw_buffer_commits; 114 std::vector<VKMemoryCommit> raw_buffer_commits;
115 u32 raw_width = 0; 115 u32 raw_width = 0;
116 u32 raw_height = 0; 116 u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 444d3fb93..10d296c2f 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | 31 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; 32 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
33 33
34constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
35 VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
36
34std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { 37std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
35 return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); 38 return std::make_unique<VKStreamBuffer>(device, scheduler);
36} 39}
37 40
38} // Anonymous namespace 41} // Anonymous namespace
39 42
40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, 43Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) 44 VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
42 : BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} { 45 : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
46 staging_pool_} {
43 const VkBufferCreateInfo ci{ 47 const VkBufferCreateInfo ci{
44 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 48 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
45 .pNext = nullptr, 49 .pNext = nullptr,
@@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
64 scheduler.RequestOutsideRenderPassOperationContext(); 68 scheduler.RequestOutsideRenderPassOperationContext();
65 69
66 const VkBuffer handle = Handle(); 70 const VkBuffer handle = Handle();
67 scheduler.Record( 71 scheduler.Record([staging = *staging.handle, handle, offset, data_size,
68 [staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) { 72 &device = device](vk::CommandBuffer cmdbuf) {
69 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); 73 const VkBufferMemoryBarrier read_barrier{
70 74 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
71 const VkBufferMemoryBarrier barrier{ 75 .pNext = nullptr,
72 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 76 .srcAccessMask =
73 .pNext = nullptr, 77 VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
74 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 78 VK_ACCESS_HOST_WRITE_BIT |
75 .dstAccessMask = UPLOAD_ACCESS_BARRIERS, 79 (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
76 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 80 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
77 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 81 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
78 .buffer = handle, 82 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
79 .offset = offset, 83 .buffer = handle,
80 .size = data_size, 84 .offset = offset,
81 }; 85 .size = data_size,
82 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 86 };
83 barrier, {}); 87 const VkBufferMemoryBarrier write_barrier{
84 }); 88 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
89 .pNext = nullptr,
90 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
91 .dstAccessMask = UPLOAD_ACCESS_BARRIERS,
92 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
93 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
94 .buffer = handle,
95 .offset = offset,
96 .size = data_size,
97 };
98 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
99 0, read_barrier);
100 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
101 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
102 write_barrier);
103 });
85} 104}
86 105
87void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { 106void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
@@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
150VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 169VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
151 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 170 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
152 const VKDevice& device_, VKMemoryManager& memory_manager_, 171 const VKDevice& device_, VKMemoryManager& memory_manager_,
153 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) 172 VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
154 : BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)}, 173 VKStagingBufferPool& staging_pool_)
174 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
175 cpu_memory_, stream_buffer_},
155 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 176 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
156 staging_pool_} {} 177 staging_pool_} {}
157 178
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 6008b8373..daf498222 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -41,6 +41,7 @@ public:
41 } 41 }
42 42
43private: 43private:
44 const VKDevice& device;
44 VKScheduler& scheduler; 45 VKScheduler& scheduler;
45 VKStagingBufferPool& staging_pool; 46 VKStagingBufferPool& staging_pool;
46 47
@@ -49,10 +50,11 @@ private:
49 50
50class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 51class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
51public: 52public:
52 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, 53 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
53 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 54 Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
54 const VKDevice& device_, VKMemoryManager& memory_manager_, 55 const VKDevice& device, VKMemoryManager& memory_manager,
55 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_); 56 VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
57 VKStagingBufferPool& staging_pool);
56 ~VKBufferCache(); 58 ~VKBufferCache();
57 59
58 BufferInfo GetEmptyBuffer(std::size_t size) override; 60 BufferInfo GetEmptyBuffer(std::size_t size) override;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 1ac7e2a30..2c030e910 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,6 +10,9 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
13#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 17#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_device.h" 18#include "video_core/renderer_vulkan/vk_device.h"
@@ -22,99 +25,6 @@ namespace Vulkan {
22 25
23namespace { 26namespace {
24 27
25// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
26constexpr u8 quad_array[] = {
27 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
28 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
29 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
30 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
31 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
32 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
33 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
34 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
35 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
36 0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
37 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
38 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
39 0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
40 0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
41 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
42 0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
43 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
44 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
45 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
46 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
47 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
48 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
49 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
50 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
51 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
52 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
53 0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
54 0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
55 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
56 0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
57 0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
58 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
59 0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
60 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
62 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
63 0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
64 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
65 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
66 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
67 0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
68 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
69 0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
70 0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
71 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
72 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
73 0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
74 0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
75 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
76 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
77 0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
78 0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
79 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
80 0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
81 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
82 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
83 0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
84 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
85 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
86 0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
87 0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
88 0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
89 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
90 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
91 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
92 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
93 0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
94 0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
95 0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
96 0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
97 0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
98 0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
99 0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
100 0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
101 0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
102 0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
103 0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
104 0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
105 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
106 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
107 0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
108 0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
109 0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
110 0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
111 0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
112 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
113 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
116};
117
118VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { 28VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
119 return { 29 return {
120 .binding = 0, 30 .binding = 0,
@@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
144 }; 54 };
145} 55}
146 56
147// Uint8 SPIR-V module. Generated from the "shaders/" directory.
148constexpr u8 uint8_pass[] = {
149 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
150 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
151 0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
152 0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
153 0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
154 0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
155 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
156 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
157 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
158 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
159 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
160 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
161 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
162 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
163 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
165 0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
166 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
167 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
168 0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
169 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
170 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
171 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
172 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
173 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
174 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
175 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
176 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
177 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
178 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
179 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
180 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
181 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
182 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
183 0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
184 0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
185 0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
186 0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
187 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
188 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
189 0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
190 0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
191 0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
192 0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
193 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
194 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
195 0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
196 0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
197 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
198 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
199 0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
200 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
201 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
202 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
203 0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
204 0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
205 0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
206 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
207 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
208 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
209 0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
210 0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
211 0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
212 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
213 0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
214 0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
215 0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
216 0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
217 0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
218 0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
219 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
220 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
221 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
222 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
223};
224
225// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
226constexpr u8 QUAD_INDEXED_SPV[] = {
227 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
229 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
230 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
231 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
232 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
233 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
234 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
235 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
236 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
237 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
238 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
239 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
241 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
242 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
243 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
244 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
245 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
246 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
247 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
248 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
249 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
250 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
251 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
252 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
253 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
254 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
255 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
256 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
257 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
258 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
259 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
260 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
261 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
262 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
263 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
264 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
265 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
266 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
267 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
268 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
269 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
270 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
271 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
272 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
273 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
274 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
275 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
276 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
277 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
278 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
279 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
280 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
281 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
282 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
283 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
284 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
285 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
286 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
287 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
288 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
289 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
290 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
291 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
292 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
293 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
294 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
295 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
296 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
297 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
298 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
299 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
300 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
301 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
302 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
303 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
304 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
305 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
306 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
307 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
308 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
309 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
310 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
311 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
312 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
313 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
314 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
315 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
316 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
317 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
318 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
319 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
320 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
321 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
322 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
323 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
324 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
325 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
326 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
327 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
328 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
329 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
330 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
331 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
332 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
333 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
334 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
335 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
336 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
337 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
338 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
339 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
340 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
341 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
342 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
343 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
344 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
345 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
346 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
347};
348
349std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { 57std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
350 return {{ 58 return {{
351 { 59 {
@@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
381VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 89VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
382 vk::Span<VkDescriptorSetLayoutBinding> bindings, 90 vk::Span<VkDescriptorSetLayoutBinding> bindings,
383 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 91 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
384 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 92 vk::Span<VkPushConstantRange> push_constants,
385 const u8* code) { 93 std::span<const u32> code) {
386 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 94 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
387 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 95 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
388 .pNext = nullptr, 96 .pNext = nullptr,
@@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
390 .bindingCount = bindings.size(), 98 .bindingCount = bindings.size(),
391 .pBindings = bindings.data(), 99 .pBindings = bindings.data(),
392 }); 100 });
393
394 layout = device.GetLogical().CreatePipelineLayout({ 101 layout = device.GetLogical().CreatePipelineLayout({
395 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 102 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
396 .pNext = nullptr, 103 .pNext = nullptr,
@@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
400 .pushConstantRangeCount = push_constants.size(), 107 .pushConstantRangeCount = push_constants.size(),
401 .pPushConstantRanges = push_constants.data(), 108 .pPushConstantRanges = push_constants.data(),
402 }); 109 });
403
404 if (!templates.empty()) { 110 if (!templates.empty()) {
405 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 111 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
406 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 112 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
@@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
417 123
418 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); 124 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
419 } 125 }
420
421 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
422 std::memcpy(code_copy.get(), code, code_size);
423
424 module = device.GetLogical().CreateShaderModule({ 126 module = device.GetLogical().CreateShaderModule({
425 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 127 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
426 .pNext = nullptr, 128 .pNext = nullptr,
427 .flags = 0, 129 .flags = 0,
428 .codeSize = code_size, 130 .codeSize = static_cast<u32>(code.size_bytes()),
429 .pCode = code_copy.get(), 131 .pCode = code.data(),
430 }); 132 });
431
432 pipeline = device.GetLogical().CreateComputePipeline({ 133 pipeline = device.GetLogical().CreateComputePipeline({
433 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 134 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
434 .pNext = nullptr, 135 .pNext = nullptr,
@@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
467 VKUpdateDescriptorQueue& update_descriptor_queue_) 168 VKUpdateDescriptorQueue& update_descriptor_queue_)
468 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), 169 : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
469 BuildQuadArrayPassDescriptorUpdateTemplateEntry(), 170 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
470 BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), 171 BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
471 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 172 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
472 update_descriptor_queue{update_descriptor_queue_} {} 173 update_descriptor_queue{update_descriptor_queue_} {}
473 174
@@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
510 return {*buffer.handle, 0}; 211 return {*buffer.handle, 0};
511} 212}
512 213
513Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_, 214Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
514 VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_, 215 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
515 VKUpdateDescriptorQueue& update_descriptor_queue_) 216 VKUpdateDescriptorQueue& update_descriptor_queue_)
516 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 217 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
517 BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), 218 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
518 uint8_pass),
519 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 219 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
520 update_descriptor_queue{update_descriptor_queue_} {} 220 update_descriptor_queue{update_descriptor_queue_} {}
521 221
@@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler
561 VKUpdateDescriptorQueue& update_descriptor_queue_) 261 VKUpdateDescriptorQueue& update_descriptor_queue_)
562 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 262 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
563 BuildInputOutputDescriptorUpdateTemplate(), 263 BuildInputOutputDescriptorUpdateTemplate(),
564 BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), 264 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
565 QUAD_INDEXED_SPV),
566 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 265 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
567 update_descriptor_queue{update_descriptor_queue_} {} 266 update_descriptor_queue{update_descriptor_queue_} {}
568 267
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 2dc87902c..abdf61e2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <span>
8#include <utility> 9#include <utility>
9 10
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -24,8 +25,7 @@ public:
24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 25 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
25 vk::Span<VkDescriptorSetLayoutBinding> bindings, 26 vk::Span<VkDescriptorSetLayoutBinding> bindings,
26 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 27 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
27 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, 28 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
28 const u8* code);
29 ~VKComputePass(); 29 ~VKComputePass();
30 30
31protected: 31protected:
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index ce3846195..370a63f74 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
46 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, 46 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
47 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, 47 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
48 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, 48 VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
49 VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
49 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 50 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
50 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 51 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
51 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 52 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
122 VK_FORMAT_R16G16_UNORM, 123 VK_FORMAT_R16G16_UNORM,
123 VK_FORMAT_R16G16_SNORM, 124 VK_FORMAT_R16G16_SNORM,
124 VK_FORMAT_R16G16_SFLOAT, 125 VK_FORMAT_R16G16_SFLOAT,
126 VK_FORMAT_R16G16_SINT,
125 VK_FORMAT_R16_UNORM, 127 VK_FORMAT_R16_UNORM,
126 VK_FORMAT_R16_UINT, 128 VK_FORMAT_R16_UINT,
127 VK_FORMAT_R8G8B8A8_SRGB, 129 VK_FORMAT_R8G8B8A8_SRGB,
@@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
161 VK_FORMAT_BC2_SRGB_BLOCK, 163 VK_FORMAT_BC2_SRGB_BLOCK,
162 VK_FORMAT_BC3_SRGB_BLOCK, 164 VK_FORMAT_BC3_SRGB_BLOCK,
163 VK_FORMAT_BC7_SRGB_BLOCK, 165 VK_FORMAT_BC7_SRGB_BLOCK,
166 VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
164 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 167 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
165 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 168 VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
166 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
167 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 169 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
168 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 170 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
169 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 171 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
170 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 172 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
171 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 173 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
172 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 174 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
173 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 175 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
176 VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
177 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
178 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
179 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
180 VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
181 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
182 VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
183 VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
184 VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
185 VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
186 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
187 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
174 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 188 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
175 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 189 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
190 VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
191 VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
176 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 192 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
177 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 193 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
178 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 194 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
@@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
192 208
193VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_, 209VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
194 VkSurfaceKHR surface, const vk::InstanceDispatch& dld_) 210 VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
195 : dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, 211 : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
196 instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} { 212 instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
197 SetupFamilies(surface); 213 SetupFamilies(surface);
198 SetupFeatures(); 214 SetupFeatures();
@@ -214,7 +230,7 @@ bool VKDevice::Create() {
214 features2.features = { 230 features2.features = {
215 .robustBufferAccess = false, 231 .robustBufferAccess = false,
216 .fullDrawIndexUint32 = false, 232 .fullDrawIndexUint32 = false,
217 .imageCubeArray = false, 233 .imageCubeArray = true,
218 .independentBlend = true, 234 .independentBlend = true,
219 .geometryShader = true, 235 .geometryShader = true,
220 .tessellationShader = true, 236 .tessellationShader = true,
@@ -242,7 +258,7 @@ bool VKDevice::Create() {
242 .shaderTessellationAndGeometryPointSize = false, 258 .shaderTessellationAndGeometryPointSize = false,
243 .shaderImageGatherExtended = true, 259 .shaderImageGatherExtended = true,
244 .shaderStorageImageExtendedFormats = false, 260 .shaderStorageImageExtendedFormats = false,
245 .shaderStorageImageMultisample = false, 261 .shaderStorageImageMultisample = true,
246 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, 262 .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
247 .shaderStorageImageWriteWithoutFormat = true, 263 .shaderStorageImageWriteWithoutFormat = true,
248 .shaderUniformBufferArrayDynamicIndexing = false, 264 .shaderUniformBufferArrayDynamicIndexing = false,
@@ -268,7 +284,6 @@ bool VKDevice::Create() {
268 .variableMultisampleRate = false, 284 .variableMultisampleRate = false,
269 .inheritedQueries = false, 285 .inheritedQueries = false,
270 }; 286 };
271
272 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ 287 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
273 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, 288 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
274 .pNext = nullptr, 289 .pNext = nullptr,
@@ -380,6 +395,20 @@ bool VKDevice::Create() {
380 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); 395 LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
381 } 396 }
382 397
398 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
399 if (ext_robustness2) {
400 robustness2 = {
401 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
402 .pNext = nullptr,
403 .robustBufferAccess2 = false,
404 .robustImageAccess2 = true,
405 .nullDescriptor = true,
406 };
407 SetNext(next, robustness2);
408 } else {
409 LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
410 }
411
383 if (!ext_depth_range_unrestricted) { 412 if (!ext_depth_range_unrestricted) {
384 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 413 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
385 } 414 }
@@ -405,7 +434,14 @@ bool VKDevice::Create() {
405 } 434 }
406 435
407 CollectTelemetryParameters(); 436 CollectTelemetryParameters();
437 CollectToolingInfo();
408 438
439 if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
440 LOG_WARNING(
441 Render_Vulkan,
442 "Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
443 ext_extended_dynamic_state = false;
444 }
409 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { 445 if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
410 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it 446 // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
411 // seems to cause stability issues 447 // seems to cause stability issues
@@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const {
458 LOG_CRITICAL(Render_Vulkan, "Device loss occured!"); 494 LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
459 495
460 // Wait for the log to flush and for Nsight Aftermath to dump the results 496 // Wait for the log to flush and for Nsight Aftermath to dump the results
461 std::this_thread::sleep_for(std::chrono::seconds{3}); 497 std::this_thread::sleep_for(std::chrono::seconds{15});
462} 498}
463 499
464void VKDevice::SaveShader(const std::vector<u32>& spirv) const { 500void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
@@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
499 return true; 535 return true;
500} 536}
501 537
538bool VKDevice::TestDepthStencilBlits() const {
539 static constexpr VkFormatFeatureFlags required_features =
540 VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
541 const auto test_features = [](VkFormatProperties props) {
542 return (props.optimalTilingFeatures & required_features) == required_features;
543 };
544 return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
545 test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
546}
547
502bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 548bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
503 FormatType format_type) const { 549 FormatType format_type) const {
504 const auto it = format_properties.find(wanted_format); 550 const auto it = format_properties.find(wanted_format);
@@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
569 const auto features{physical.GetFeatures()}; 615 const auto features{physical.GetFeatures()};
570 const std::array feature_report = { 616 const std::array feature_report = {
571 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 617 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
618 std::make_pair(features.imageCubeArray, "imageCubeArray"),
572 std::make_pair(features.independentBlend, "independentBlend"), 619 std::make_pair(features.independentBlend, "independentBlend"),
573 std::make_pair(features.depthClamp, "depthClamp"), 620 std::make_pair(features.depthClamp, "depthClamp"),
574 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), 621 std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
@@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
580 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), 627 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
581 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 628 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
582 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 629 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
630 std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
583 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 631 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
584 "shaderStorageImageWriteWithoutFormat"), 632 "shaderStorageImageWriteWithoutFormat"),
585 }; 633 };
@@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
608 bool has_ext_transform_feedback{}; 656 bool has_ext_transform_feedback{};
609 bool has_ext_custom_border_color{}; 657 bool has_ext_custom_border_color{};
610 bool has_ext_extended_dynamic_state{}; 658 bool has_ext_extended_dynamic_state{};
659 bool has_ext_robustness2{};
611 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 660 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
612 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 661 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
613 bool push) { 662 bool push) {
@@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() {
627 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 676 test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
628 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 677 test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
629 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 678 test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
679 test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
630 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, 680 test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
631 true); 681 true);
682 test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
683 test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
632 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); 684 test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
633 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 685 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
634 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 686 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
687 test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
635 if (instance_version >= VK_API_VERSION_1_1) { 688 if (instance_version >= VK_API_VERSION_1_1) {
636 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 689 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
637 } 690 }
@@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() {
733 } 786 }
734 } 787 }
735 788
789 if (has_ext_robustness2) {
790 VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
791 robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
792 robustness2.pNext = nullptr;
793 features.pNext = &robustness2;
794 physical.GetFeatures2KHR(features);
795 if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
796 extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
797 ext_robustness2 = true;
798 }
799 }
800
736 return extensions; 801 return extensions;
737} 802}
738 803
@@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
764void VKDevice::SetupFeatures() { 829void VKDevice::SetupFeatures() {
765 const auto supported_features{physical.GetFeatures()}; 830 const auto supported_features{physical.GetFeatures()};
766 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 831 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
832 is_blit_depth_stencil_supported = TestDepthStencilBlits();
767 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); 833 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
768} 834}
769 835
@@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() {
794 } 860 }
795} 861}
796 862
863void VKDevice::CollectToolingInfo() {
864 if (!ext_tooling_info) {
865 return;
866 }
867 const auto vkGetPhysicalDeviceToolPropertiesEXT =
868 reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
869 dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
870 if (!vkGetPhysicalDeviceToolPropertiesEXT) {
871 return;
872 }
873 u32 tool_count = 0;
874 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
875 return;
876 }
877 std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
878 if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
879 return;
880 }
881 for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
882 const std::string_view name = tool.name;
883 LOG_INFO(Render_Vulkan, "{}", name);
884 has_renderdoc = has_renderdoc || name == "RenderDoc";
885 has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
886 }
887}
888
797std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { 889std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
798 static constexpr float QUEUE_PRIORITY = 1.0f; 890 static constexpr float QUEUE_PRIORITY = 1.0f;
799 891
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 4286673d9..995dcfc0f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -157,6 +157,11 @@ public:
157 return is_formatless_image_load_supported; 157 return is_formatless_image_load_supported;
158 } 158 }
159 159
160 /// Returns true when blitting from and to depth stencil images is supported.
161 bool IsBlitDepthStencilSupported() const {
162 return is_blit_depth_stencil_supported;
163 }
164
160 /// Returns true if the device supports VK_NV_viewport_swizzle. 165 /// Returns true if the device supports VK_NV_viewport_swizzle.
161 bool IsNvViewportSwizzleSupported() const { 166 bool IsNvViewportSwizzleSupported() const {
162 return nv_viewport_swizzle; 167 return nv_viewport_swizzle;
@@ -172,6 +177,11 @@ public:
172 return ext_index_type_uint8; 177 return ext_index_type_uint8;
173 } 178 }
174 179
180 /// Returns true if the device supports VK_EXT_sampler_filter_minmax.
181 bool IsExtSamplerFilterMinmaxSupported() const {
182 return ext_sampler_filter_minmax;
183 }
184
175 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 185 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
176 bool IsExtDepthRangeUnrestrictedSupported() const { 186 bool IsExtDepthRangeUnrestrictedSupported() const {
177 return ext_depth_range_unrestricted; 187 return ext_depth_range_unrestricted;
@@ -197,6 +207,16 @@ public:
197 return ext_extended_dynamic_state; 207 return ext_extended_dynamic_state;
198 } 208 }
199 209
210 /// Returns true if the device supports VK_EXT_shader_stencil_export.
211 bool IsExtShaderStencilExportSupported() const {
212 return ext_shader_stencil_export;
213 }
214
215 /// Returns true when a known debugging tool is attached.
216 bool HasDebuggingToolAttached() const {
217 return has_renderdoc || has_nsight_graphics;
218 }
219
200 /// Returns the vendor name reported from Vulkan. 220 /// Returns the vendor name reported from Vulkan.
201 std::string_view GetVendorName() const { 221 std::string_view GetVendorName() const {
202 return vendor_name; 222 return vendor_name;
@@ -228,16 +248,23 @@ private:
228 /// Collects telemetry information from the device. 248 /// Collects telemetry information from the device.
229 void CollectTelemetryParameters(); 249 void CollectTelemetryParameters();
230 250
251 /// Collects information about attached tools.
252 void CollectToolingInfo();
253
231 /// Returns a list of queue initialization descriptors. 254 /// Returns a list of queue initialization descriptors.
232 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 255 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
233 256
234 /// Returns true if ASTC textures are natively supported. 257 /// Returns true if ASTC textures are natively supported.
235 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; 258 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
236 259
260 /// Returns true if the device natively supports blitting depth stencil images.
261 bool TestDepthStencilBlits() const;
262
237 /// Returns true if a format is supported. 263 /// Returns true if a format is supported.
238 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 264 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
239 FormatType format_type) const; 265 FormatType format_type) const;
240 266
267 VkInstance instance; ///< Vulkan instance.
241 vk::DeviceDispatch dld; ///< Device function pointers. 268 vk::DeviceDispatch dld; ///< Device function pointers.
242 vk::PhysicalDevice physical; ///< Physical device. 269 vk::PhysicalDevice physical; ///< Physical device.
243 VkPhysicalDeviceProperties properties; ///< Device properties. 270 VkPhysicalDeviceProperties properties; ///< Device properties.
@@ -253,15 +280,22 @@ private:
253 bool is_float16_supported{}; ///< Support for float16 arithmetics. 280 bool is_float16_supported{}; ///< Support for float16 arithmetics.
254 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 281 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
255 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 282 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
283 bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
256 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle. 284 bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
257 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 285 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
258 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 286 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
287 bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
259 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 288 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
260 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 289 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
290 bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
261 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. 291 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
262 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 292 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
263 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 293 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
294 bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
295 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
264 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 296 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
297 bool has_renderdoc{}; ///< Has RenderDoc attached
298 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
265 299
266 // Asynchronous Graphics Pipeline setting 300 // Asynchronous Graphics Pipeline setting
267 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline 301 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 0bcaee714..774a12a53 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const {
73} 73}
74 74
75VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 75VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
76 Tegra::MemoryManager& memory_manager_, 76 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
77 VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_, 77 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
78 VKQueryCache& query_cache_, const VKDevice& device_, 78 const VKDevice& device_, VKScheduler& scheduler_)
79 VKScheduler& scheduler_)
80 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, 79 : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
81 device{device_}, scheduler{scheduler_} {} 80 device{device_}, scheduler{scheduler_} {}
82 81
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index c8547cc24..c2869e8e3 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -8,6 +8,7 @@
8 8
9#include "video_core/fence_manager.h" 9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h" 10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11#include "video_core/renderer_vulkan/vk_texture_cache.h"
11#include "video_core/renderer_vulkan/wrapper.h" 12#include "video_core/renderer_vulkan/wrapper.h"
12 13
13namespace Core { 14namespace Core {
@@ -24,7 +25,6 @@ class VKBufferCache;
24class VKDevice; 25class VKDevice;
25class VKQueryCache; 26class VKQueryCache;
26class VKScheduler; 27class VKScheduler;
27class VKTextureCache;
28 28
29class InnerFence : public VideoCommon::FenceBase { 29class InnerFence : public VideoCommon::FenceBase {
30public: 30public:
@@ -51,12 +51,12 @@ private:
51using Fence = std::shared_ptr<InnerFence>; 51using Fence = std::shared_ptr<InnerFence>;
52 52
53using GenericFenceManager = 53using GenericFenceManager =
54 VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; 54 VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
55 55
56class VKFenceManager final : public GenericFenceManager { 56class VKFenceManager final : public GenericFenceManager {
57public: 57public:
58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 58 explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
59 Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_, 59 Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
60 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, 60 VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
61 const VKDevice& device_, VKScheduler& scheduler_); 61 const VKDevice& device_, VKScheduler& scheduler_);
62 62
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 970979fa1..7979df3a8 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -15,7 +15,6 @@
15#include "video_core/renderer_vulkan/vk_device.h" 15#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 16#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
17#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 17#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
18#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h" 18#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
21#include "video_core/renderer_vulkan/wrapper.h" 20#include "video_core/renderer_vulkan/wrapper.h"
@@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
69 }; 68 };
70} 69}
71 70
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
72 switch (msaa_mode) {
73 case Tegra::Texture::MsaaMode::Msaa1x1:
74 return VK_SAMPLE_COUNT_1_BIT;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 }
93}
94
72} // Anonymous namespace 95} // Anonymous namespace
73 96
74VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, 97VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
75 VKDescriptorPool& descriptor_pool_, 98 VKDescriptorPool& descriptor_pool_,
76 VKUpdateDescriptorQueue& update_descriptor_queue_, 99 VKUpdateDescriptorQueue& update_descriptor_queue_,
77 VKRenderPassCache& renderpass_cache_, 100 const GraphicsPipelineCacheKey& key,
78 const GraphicsPipelineCacheKey& key_, 101 vk::Span<VkDescriptorSetLayoutBinding> bindings,
79 vk::Span<VkDescriptorSetLayoutBinding> bindings_, 102 const SPIRVProgram& program, u32 num_color_buffers)
80 const SPIRVProgram& program_) 103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
81 : device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()}, 104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
82 descriptor_set_layout{CreateDescriptorSetLayout(bindings_)},
83 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
84 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, 106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
85 descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules( 107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
86 program_)}, 108 modules(CreateShaderModules(program)),
87 renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)}, 109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
88 pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {}
89 110
90VKGraphicsPipeline::~VKGraphicsPipeline() = default; 111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
91 112
@@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
179 return shader_modules; 200 return shader_modules;
180} 201}
181 202
182vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 203vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
183 const SPIRVProgram& program) const { 204 VkRenderPass renderpass,
205 u32 num_color_buffers) const {
184 const auto& state = cache_key.fixed_state; 206 const auto& state = cache_key.fixed_state;
185 const auto& viewport_swizzles = state.viewport_swizzles; 207 const auto& viewport_swizzles = state.viewport_swizzles;
186 208
@@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
290 }; 312 };
291 313
292 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 314 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
293 std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(), 315 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
294 UnpackViewportSwizzle);
295 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 316 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 317 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
297 .pNext = nullptr, 318 .pNext = nullptr,
@@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
326 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 347 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
327 .pNext = nullptr, 348 .pNext = nullptr,
328 .flags = 0, 349 .flags = 0,
329 .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, 350 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
330 .sampleShadingEnable = VK_FALSE, 351 .sampleShadingEnable = VK_FALSE,
331 .minSampleShading = 0.0f, 352 .minSampleShading = 0.0f,
332 .pSampleMask = nullptr, 353 .pSampleMask = nullptr,
@@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
352 }; 373 };
353 374
354 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 375 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
355 const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments); 376 for (std::size_t index = 0; index < num_color_buffers; ++index) {
356 for (std::size_t index = 0; index < num_attachments; ++index) {
357 static constexpr std::array COMPONENT_TABLE{ 377 static constexpr std::array COMPONENT_TABLE{
358 VK_COLOR_COMPONENT_R_BIT, 378 VK_COLOR_COMPONENT_R_BIT,
359 VK_COLOR_COMPONENT_G_BIT, 379 VK_COLOR_COMPONENT_G_BIT,
@@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
387 .flags = 0, 407 .flags = 0,
388 .logicOpEnable = VK_FALSE, 408 .logicOpEnable = VK_FALSE,
389 .logicOp = VK_LOGIC_OP_COPY, 409 .logicOp = VK_LOGIC_OP_COPY,
390 .attachmentCount = static_cast<u32>(num_attachments), 410 .attachmentCount = num_color_buffers,
391 .pAttachments = cb_attachments.data(), 411 .pAttachments = cb_attachments.data(),
392 .blendConstants = {}, 412 .blendConstants = {},
393 }; 413 };
@@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
447 stage_ci.pNext = &subgroup_size_ci; 467 stage_ci.pNext = &subgroup_size_ci;
448 } 468 }
449 } 469 }
450 470 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
451 const VkGraphicsPipelineCreateInfo ci{
452 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 471 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
453 .pNext = nullptr, 472 .pNext = nullptr,
454 .flags = 0, 473 .flags = 0,
@@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
468 .subpass = 0, 487 .subpass = 0,
469 .basePipelineHandle = nullptr, 488 .basePipelineHandle = nullptr,
470 .basePipelineIndex = 0, 489 .basePipelineIndex = 0,
471 }; 490 });
472 return device.GetLogical().CreateGraphicsPipeline(ci);
473} 491}
474 492
475} // namespace Vulkan 493} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 3fb31d55a..214d06b4c 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -8,10 +8,10 @@
8#include <optional> 8#include <optional>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 13#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
13#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
14#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 15#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/renderer_vulkan/wrapper.h" 16#include "video_core/renderer_vulkan/wrapper.h"
17 17
@@ -20,8 +20,7 @@ namespace Vulkan {
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22struct GraphicsPipelineCacheKey { 22struct GraphicsPipelineCacheKey {
23 RenderPassParams renderpass_params; 23 VkRenderPass renderpass;
24 u32 padding;
25 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
26 FixedPipelineState fixed_state; 25 FixedPipelineState fixed_state;
27 26
@@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey {
34 } 33 }
35 34
36 std::size_t Size() const noexcept { 35 std::size_t Size() const noexcept {
37 return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); 36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
38 } 37 }
39}; 38};
40static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
@@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
43 42
44class VKDescriptorPool; 43class VKDescriptorPool;
45class VKDevice; 44class VKDevice;
46class VKRenderPassCache;
47class VKScheduler; 45class VKScheduler;
48class VKUpdateDescriptorQueue; 46class VKUpdateDescriptorQueue;
49 47
@@ -52,12 +50,11 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
52class VKGraphicsPipeline final { 50class VKGraphicsPipeline final {
53public: 51public:
54 explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_, 52 explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
55 VKDescriptorPool& descriptor_pool_, 53 VKDescriptorPool& descriptor_pool,
56 VKUpdateDescriptorQueue& update_descriptor_queue_, 54 VKUpdateDescriptorQueue& update_descriptor_queue_,
57 VKRenderPassCache& renderpass_cache_, 55 const GraphicsPipelineCacheKey& key,
58 const GraphicsPipelineCacheKey& key_, 56 vk::Span<VkDescriptorSetLayoutBinding> bindings,
59 vk::Span<VkDescriptorSetLayoutBinding> bindings_, 57 const SPIRVProgram& program, u32 num_color_buffers);
60 const SPIRVProgram& program_);
61 ~VKGraphicsPipeline(); 58 ~VKGraphicsPipeline();
62 59
63 VkDescriptorSet CommitDescriptorSet(); 60 VkDescriptorSet CommitDescriptorSet();
@@ -70,10 +67,6 @@ public:
70 return *layout; 67 return *layout;
71 } 68 }
72 69
73 VkRenderPass GetRenderPass() const {
74 return renderpass;
75 }
76
77 GraphicsPipelineCacheKey GetCacheKey() const { 70 GraphicsPipelineCacheKey GetCacheKey() const {
78 return cache_key; 71 return cache_key;
79 } 72 }
@@ -89,8 +82,8 @@ private:
89 82
90 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
91 84
92 vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, 85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
93 const SPIRVProgram& program) const; 86 u32 num_color_buffers) const;
94 87
95 const VKDevice& device; 88 const VKDevice& device;
96 VKScheduler& scheduler; 89 VKScheduler& scheduler;
@@ -104,7 +97,6 @@ private:
104 vk::DescriptorUpdateTemplateKHR descriptor_template; 97 vk::DescriptorUpdateTemplateKHR descriptor_template;
105 std::vector<vk::ShaderModule> modules; 98 std::vector<vk::ShaderModule> modules;
106 99
107 VkRenderPass renderpass;
108 vk::Pipeline pipeline; 100 vk::Pipeline pipeline;
109}; 101};
110 102
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
deleted file mode 100644
index 072d14e3b..000000000
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <vector>
7
8#include "common/assert.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_image.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13
14namespace Vulkan {
15
16VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_,
17 const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_)
18 : device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_},
19 image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} {
20 UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0,
21 "Queue family tracking is not implemented");
22
23 image = device_.GetLogical().CreateImage(image_ci_);
24
25 const u32 num_ranges = image_num_layers * image_num_levels;
26 barriers.resize(num_ranges);
27 subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout});
28}
29
30VKImage::~VKImage() = default;
31
32void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
33 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
34 VkImageLayout new_layout) {
35 if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
36 return;
37 }
38
39 std::size_t cursor = 0;
40 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
41 for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
42 const u32 layer = base_layer + layer_it;
43 const u32 level = base_level + level_it;
44 auto& state = GetSubrangeState(layer, level);
45 auto& barrier = barriers[cursor];
46 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
47 barrier.pNext = nullptr;
48 barrier.srcAccessMask = state.access;
49 barrier.dstAccessMask = new_access;
50 barrier.oldLayout = state.layout;
51 barrier.newLayout = new_layout;
52 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
53 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
54 barrier.image = *image;
55 barrier.subresourceRange.aspectMask = aspect_mask;
56 barrier.subresourceRange.baseMipLevel = level;
57 barrier.subresourceRange.levelCount = 1;
58 barrier.subresourceRange.baseArrayLayer = layer;
59 barrier.subresourceRange.layerCount = 1;
60 state.access = new_access;
61 state.layout = new_layout;
62 }
63 }
64
65 scheduler.RequestOutsideRenderPassOperationContext();
66
67 scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
68 // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
69 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
70 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
71 vk::Span(barriers.data(), cursor));
72 });
73}
74
75bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
76 VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
77 const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
78 base_level == 0 && num_levels == image_num_levels;
79 if (!is_full_range) {
80 state_diverged = true;
81 }
82
83 if (!state_diverged) {
84 auto& state = GetSubrangeState(0, 0);
85 if (state.access != new_access || state.layout != new_layout) {
86 return true;
87 }
88 }
89
90 for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
91 for (u32 level_it = 0; level_it < num_levels; ++level_it) {
92 const u32 layer = base_layer + layer_it;
93 const u32 level = base_level + level_it;
94 auto& state = GetSubrangeState(layer, level);
95 if (state.access != new_access || state.layout != new_layout) {
96 return true;
97 }
98 }
99 }
100 return false;
101}
102
103void VKImage::CreatePresentView() {
104 // Image type has to be 2D to be presented.
105 present_view = device.GetLogical().CreateImageView({
106 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
107 .pNext = nullptr,
108 .flags = 0,
109 .image = *image,
110 .viewType = VK_IMAGE_VIEW_TYPE_2D,
111 .format = format,
112 .components =
113 {
114 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
115 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
116 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
117 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
118 },
119 .subresourceRange =
120 {
121 .aspectMask = aspect_mask,
122 .baseMipLevel = 0,
123 .levelCount = 1,
124 .baseArrayLayer = 0,
125 .layerCount = 1,
126 },
127 });
128}
129
130VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
131 return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
132 static_cast<std::size_t>(level)];
133}
134
135} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h
deleted file mode 100644
index 287ab90ca..000000000
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ /dev/null
@@ -1,84 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12
13namespace Vulkan {
14
15class VKDevice;
16class VKScheduler;
17
18class VKImage {
19public:
20 explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_,
21 const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_);
22 ~VKImage();
23
24 /// Records in the passed command buffer an image transition and updates the state of the image.
25 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
26 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
27 VkImageLayout new_layout);
28
29 /// Returns a view compatible with presentation, the image has to be 2D.
30 VkImageView GetPresentView() {
31 if (!present_view) {
32 CreatePresentView();
33 }
34 return *present_view;
35 }
36
37 /// Returns the Vulkan image handler.
38 const vk::Image& GetHandle() const {
39 return image;
40 }
41
42 /// Returns the Vulkan format for this image.
43 VkFormat GetFormat() const {
44 return format;
45 }
46
47 /// Returns the Vulkan aspect mask.
48 VkImageAspectFlags GetAspectMask() const {
49 return aspect_mask;
50 }
51
52private:
53 struct SubrangeState final {
54 VkAccessFlags access = 0; ///< Current access bits.
55 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
56 };
57
58 bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
59 VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
60
61 /// Creates a presentation view.
62 void CreatePresentView();
63
64 /// Returns the subrange state for a layer and layer.
65 SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
66
67 const VKDevice& device; ///< Device handler.
68 VKScheduler& scheduler; ///< Device scheduler.
69
70 const VkFormat format; ///< Vulkan format.
71 const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
72 const u32 image_num_layers; ///< Number of layers.
73 const u32 image_num_levels; ///< Number of mipmap levels.
74
75 vk::Image image; ///< Image handle.
76 vk::ImageView present_view; ///< Image view compatible with presentation.
77
78 std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
79 std::vector<SubrangeState> subrange_states; ///< Current subrange state.
80
81 bool state_diverged = false; ///< True when subresources mismatch in layout.
82};
83
84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index be53d450f..56b24b70f 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() {
216} 216}
217 217
218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { 218MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
219 return MemoryMap{this, memory.Map(interval.first + offset_, size)}; 219 return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
220} 220}
221 221
222void VKMemoryCommitImpl::Unmap() const { 222void VKMemoryCommitImpl::Unmap() const {
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index 39f903ec8..318f8b43e 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <span>
8#include <utility> 9#include <utility>
9#include <vector> 10#include <vector>
10#include "common/common_types.h" 11#include "common/common_types.h"
@@ -93,8 +94,8 @@ private:
93/// Holds ownership of a memory map. 94/// Holds ownership of a memory map.
94class MemoryMap final { 95class MemoryMap final {
95public: 96public:
96 explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_) 97 explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
97 : commit{commit_}, address{address_} {} 98 : commit{commit_}, span{span_} {}
98 99
99 ~MemoryMap() { 100 ~MemoryMap() {
100 if (commit) { 101 if (commit) {
@@ -108,19 +109,24 @@ public:
108 commit = nullptr; 109 commit = nullptr;
109 } 110 }
110 111
112 /// Returns a span to the memory map.
113 [[nodiscard]] std::span<u8> Span() const noexcept {
114 return span;
115 }
116
111 /// Returns the address of the memory map. 117 /// Returns the address of the memory map.
112 u8* GetAddress() const { 118 [[nodiscard]] u8* Address() const noexcept {
113 return address; 119 return span.data();
114 } 120 }
115 121
116 /// Returns the address of the memory map; 122 /// Returns the address of the memory map;
117 operator u8*() const { 123 [[nodiscard]] operator u8*() const noexcept {
118 return address; 124 return span.data();
119 } 125 }
120 126
121private: 127private:
122 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. 128 const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
123 u8* address{}; ///< Address to the mapped memory. 129 std::span<u8> span; ///< Address to the mapped memory.
124}; 130};
125 131
126} // namespace Vulkan 132} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3fb264d03..083796d05 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/bit_cast.h" 10#include "common/bit_cast.h"
11#include "common/cityhash.h"
11#include "common/microprofile.h" 12#include "common/microprofile.h"
12#include "core/core.h" 13#include "core/core.h"
13#include "core/memory.h" 14#include "core/memory.h"
@@ -22,7 +23,6 @@
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 23#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 25#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
27#include "video_core/renderer_vulkan/vk_update_descriptor.h" 27#include "video_core/renderer_vulkan/vk_update_descriptor.h"
28#include "video_core/renderer_vulkan/wrapper.h" 28#include "video_core/renderer_vulkan/wrapper.h"
@@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
53 53
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
55 VideoCommon::Shader::CompileDepth::FullDecompile}; 55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
56 .disable_else_derivation = true,
57};
56 58
57constexpr std::size_t GetStageFromProgram(std::size_t program) { 59constexpr std::size_t GetStageFromProgram(std::size_t program) {
58 return program == 0 ? 0 : program - 1; 60 return program == 0 ? 0 : program - 1;
@@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_
149 Tegra::Engines::KeplerCompute& kepler_compute_, 151 Tegra::Engines::KeplerCompute& kepler_compute_,
150 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 152 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
151 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
152 VKUpdateDescriptorQueue& update_descriptor_queue_, 154 VKUpdateDescriptorQueue& update_descriptor_queue_)
153 VKRenderPassCache& renderpass_cache_) 155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
154 : ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
155 gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, 157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
156 descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, 158 update_descriptor_queue_} {}
157 renderpass_cache{renderpass_cache_} {}
158 159
159VKPipelineCache::~VKPipelineCache() = default; 160VKPipelineCache::~VKPipelineCache() = default;
160 161
@@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
199} 200}
200 201
201VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
202 const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { 203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
203 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 205 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
204 206
205 if (last_graphics_pipeline && last_graphics_key == key) { 207 if (last_graphics_pipeline && last_graphics_key == key) {
@@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
215 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
216 const auto [program, bindings] = DecompileShaders(key.fixed_state); 218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
217 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, 219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
218 update_descriptor_queue, renderpass_cache, bindings, 220 update_descriptor_queue, bindings, program, key,
219 program, key); 221 num_color_buffers);
220 } 222 }
221 last_graphics_pipeline = pair->second.get(); 223 last_graphics_pipeline = pair->second.get();
222 return last_graphics_pipeline; 224 return last_graphics_pipeline;
@@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
229 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
230 const auto [program, bindings] = DecompileShaders(key.fixed_state); 232 const auto [program, bindings] = DecompileShaders(key.fixed_state);
231 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
232 update_descriptor_queue, renderpass_cache, key, 234 update_descriptor_queue, key, bindings,
233 bindings, program); 235 program, num_color_buffers);
234 gpu.ShaderNotify().MarkShaderComplete(); 236 gpu.ShaderNotify().MarkShaderComplete();
235 } 237 }
236 last_graphics_pipeline = entry.get(); 238 last_graphics_pipeline = entry.get();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 9e1f8fcbb..fbaa8257c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -19,7 +19,6 @@
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
23#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 22#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
24#include "video_core/renderer_vulkan/wrapper.h" 23#include "video_core/renderer_vulkan/wrapper.h"
25#include "video_core/shader/async_shaders.h" 24#include "video_core/shader/async_shaders.h"
@@ -119,18 +118,18 @@ private:
119 118
120class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
121public: 120public:
122 explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
123 Tegra::Engines::Maxwell3D& maxwell3d_, 122 Tegra::Engines::Maxwell3D& maxwell3d,
124 Tegra::Engines::KeplerCompute& kepler_compute_, 123 Tegra::Engines::KeplerCompute& kepler_compute,
125 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 124 Tegra::MemoryManager& gpu_memory, const VKDevice& device,
126 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, 125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
127 VKUpdateDescriptorQueue& update_descriptor_queue_, 126 VKUpdateDescriptorQueue& update_descriptor_queue);
128 VKRenderPassCache& renderpass_cache_);
129 ~VKPipelineCache() override; 127 ~VKPipelineCache() override;
130 128
131 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
132 130
133 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
132 u32 num_color_buffers,
134 VideoCommon::Shader::AsyncShaders& async_shaders); 133 VideoCommon::Shader::AsyncShaders& async_shaders);
135 134
136 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
@@ -153,7 +152,6 @@ private:
153 VKScheduler& scheduler; 152 VKScheduler& scheduler;
154 VKDescriptorPool& descriptor_pool; 153 VKDescriptorPool& descriptor_pool;
155 VKUpdateDescriptorQueue& update_descriptor_queue; 154 VKUpdateDescriptorQueue& update_descriptor_queue;
156 VKRenderPassCache& renderpass_cache;
157 155
158 std::unique_ptr<Shader> null_shader; 156 std::unique_ptr<Shader> null_shader;
159 std::unique_ptr<Shader> null_kernel; 157 std::unique_ptr<Shader> null_kernel;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f93986aab..04c5c859c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -19,6 +19,7 @@
19#include "core/settings.h" 19#include "core/settings.h"
20#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/blit_image.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
23#include "video_core/renderer_vulkan/maxwell_to_vk.h" 24#include "video_core/renderer_vulkan/maxwell_to_vk.h"
24#include "video_core/renderer_vulkan/renderer_vulkan.h" 25#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -30,8 +31,6 @@
30#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 31#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
31#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 32#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
32#include "video_core/renderer_vulkan/vk_rasterizer.h" 33#include "video_core/renderer_vulkan/vk_rasterizer.h"
33#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
34#include "video_core/renderer_vulkan/vk_sampler_cache.h"
35#include "video_core/renderer_vulkan/vk_scheduler.h" 34#include "video_core/renderer_vulkan/vk_scheduler.h"
36#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 35#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
37#include "video_core/renderer_vulkan/vk_state_tracker.h" 36#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -39,10 +38,13 @@
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h" 39#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h" 40#include "video_core/shader_cache.h"
41#include "video_core/texture_cache/texture_cache.h"
42 42
43namespace Vulkan { 43namespace Vulkan {
44 44
45using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
46using VideoCommon::ImageViewId;
47using VideoCommon::ImageViewType;
46 48
47MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); 49MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
48MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); 50MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
@@ -58,9 +60,9 @@ MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192
58 60
59namespace { 61namespace {
60 62
61constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); 63constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62 64
63VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { 65VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 66 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 67 const float width = src.scale_x * 2.0f;
66 const float height = src.scale_y * 2.0f; 68 const float height = src.scale_y * 2.0f;
@@ -83,7 +85,7 @@ VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::si
83 return viewport; 85 return viewport;
84} 86}
85 87
86VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { 88VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
87 const auto& src = regs.scissor_test[index]; 89 const auto& src = regs.scissor_test[index];
88 VkRect2D scissor; 90 VkRect2D scissor;
89 if (src.enable) { 91 if (src.enable) {
@@ -103,98 +105,122 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
103std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 105std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
104 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 106 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
105 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 107 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
106 for (std::size_t i = 0; i < std::size(addresses); ++i) { 108 for (size_t i = 0; i < std::size(addresses); ++i) {
107 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 109 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
108 } 110 }
109 return addresses; 111 return addresses;
110} 112}
111 113
112void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, 114struct TextureHandle {
113 VkAccessFlags access) { 115 constexpr TextureHandle(u32 data, bool via_header_index) {
114 for (auto& [view, layout] : views) { 116 const Tegra::Texture::TextureHandle handle{data};
115 view->Transition(*layout, pipeline_stage, access); 117 image = handle.tic_id;
118 sampler = via_header_index ? image : handle.tsc_id.Value();
116 } 119 }
117} 120
121 u32 image;
122 u32 sampler;
123};
118 124
119template <typename Engine, typename Entry> 125template <typename Engine, typename Entry>
120Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 126TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
121 std::size_t stage, std::size_t index = 0) { 127 size_t stage, size_t index = 0) {
122 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 128 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
123 if constexpr (std::is_same_v<Entry, SamplerEntry>) { 129 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
124 if (entry.is_separated) { 130 if (entry.is_separated) {
125 const u32 buffer_1 = entry.buffer; 131 const u32 buffer_1 = entry.buffer;
126 const u32 buffer_2 = entry.secondary_buffer; 132 const u32 buffer_2 = entry.secondary_buffer;
127 const u32 offset_1 = entry.offset; 133 const u32 offset_1 = entry.offset;
128 const u32 offset_2 = entry.secondary_offset; 134 const u32 offset_2 = entry.secondary_offset;
129 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); 135 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
130 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); 136 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
131 return engine.GetTextureInfo(Tegra::Texture::TextureHandle{handle_1 | handle_2}); 137 return TextureHandle(handle_1 | handle_2, via_header_index);
132 } 138 }
133 } 139 }
134 if (entry.is_bindless) { 140 if (entry.is_bindless) {
135 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 141 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
136 return engine.GetTextureInfo(Tegra::Texture::TextureHandle{tex_handle}); 142 return TextureHandle(raw, via_header_index);
137 }
138 const auto& gpu_profile = engine.AccessGuestDriverProfile();
139 const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
140 const u32 offset = entry.offset + entry_offset;
141 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
142 return engine.GetStageTexture(stage_type, offset);
143 } else {
144 return engine.GetTexture(offset);
145 }
146}
147
148/// @brief Determine if an attachment to be updated has to preserve contents
149/// @param is_clear True when a clear is being executed
150/// @param regs 3D registers
151/// @return True when the contents have to be preserved
152bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
153 if (!is_clear) {
154 return true;
155 }
156 // First we have to make sure all clear masks are enabled.
157 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
158 !regs.clear_buffers.A) {
159 return true;
160 }
161 // If scissors are disabled, the whole screen is cleared
162 if (!regs.clear_flags.scissor) {
163 return false;
164 } 143 }
165 // Then we have to confirm scissor testing clears the whole image 144 const u32 buffer = engine.GetBoundBuffer();
166 const std::size_t index = regs.clear_buffers.RT; 145 const u64 offset = (entry.offset + index) * sizeof(u32);
167 const auto& scissor = regs.scissor_test[0]; 146 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
168 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
169 scissor.max_y < regs.rt[index].height;
170} 147}
171 148
172/// @brief Determine if an attachment to be updated has to preserve contents 149template <size_t N>
173/// @param is_clear True when a clear is being executed
174/// @param regs 3D registers
175/// @return True when the contents have to be preserved
176bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
177 // If we are not clearing, the contents have to be preserved
178 if (!is_clear) {
179 return true;
180 }
181 // For depth stencil clears we only have to confirm scissor test covers the whole image
182 if (!regs.clear_flags.scissor) {
183 return false;
184 }
185 // Make sure the clear cover the whole image
186 const auto& scissor = regs.scissor_test[0];
187 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
188 scissor.max_y < regs.zeta_height;
189}
190
191template <std::size_t N>
192std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { 150std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) {
193 std::array<VkDeviceSize, N> expanded; 151 std::array<VkDeviceSize, N> expanded;
194 std::copy(strides.begin(), strides.end(), expanded.begin()); 152 std::copy(strides.begin(), strides.end(), expanded.begin());
195 return expanded; 153 return expanded;
196} 154}
197 155
156ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
157 if (entry.is_buffer) {
158 return ImageViewType::e2D;
159 }
160 switch (entry.type) {
161 case Tegra::Shader::TextureType::Texture1D:
162 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
163 case Tegra::Shader::TextureType::Texture2D:
164 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
165 case Tegra::Shader::TextureType::Texture3D:
166 return ImageViewType::e3D;
167 case Tegra::Shader::TextureType::TextureCube:
168 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
169 }
170 UNREACHABLE();
171 return ImageViewType::e2D;
172}
173
174ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
175 switch (entry.type) {
176 case Tegra::Shader::ImageType::Texture1D:
177 return ImageViewType::e1D;
178 case Tegra::Shader::ImageType::Texture1DArray:
179 return ImageViewType::e1DArray;
180 case Tegra::Shader::ImageType::Texture2D:
181 return ImageViewType::e2D;
182 case Tegra::Shader::ImageType::Texture2DArray:
183 return ImageViewType::e2DArray;
184 case Tegra::Shader::ImageType::Texture3D:
185 return ImageViewType::e3D;
186 case Tegra::Shader::ImageType::TextureBuffer:
187 return ImageViewType::Buffer;
188 }
189 UNREACHABLE();
190 return ImageViewType::e2D;
191}
192
193void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
194 VKUpdateDescriptorQueue& update_descriptor_queue,
195 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
196 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
197 const ImageViewId image_view_id = *image_view_id_ptr++;
198 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
199 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
200 }
201 for (const auto& entry : entries.samplers) {
202 for (size_t i = 0; i < entry.size; ++i) {
203 const VkSampler sampler = *sampler_ptr++;
204 const ImageViewId image_view_id = *image_view_id_ptr++;
205 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
206 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
207 update_descriptor_queue.AddSampledImage(handle, sampler);
208 }
209 }
210 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
211 const ImageViewId image_view_id = *image_view_id_ptr++;
212 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
213 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
214 }
215 for (const auto& entry : entries.images) {
216 // TODO: Mark as modified
217 const ImageViewId image_view_id = *image_view_id_ptr++;
218 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
219 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
220 update_descriptor_queue.AddImage(handle);
221 }
222}
223
198} // Anonymous namespace 224} // Anonymous namespace
199 225
200class BufferBindings final { 226class BufferBindings final {
@@ -290,7 +316,7 @@ public:
290private: 316private:
291 // Some of these fields are intentionally left uninitialized to avoid initializing them twice. 317 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
292 struct { 318 struct {
293 std::size_t num_buffers = 0; 319 size_t num_buffers = 0;
294 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; 320 std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
295 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; 321 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
296 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; 322 std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes;
@@ -303,7 +329,7 @@ private:
303 VkIndexType type; 329 VkIndexType type;
304 } index; 330 } index;
305 331
306 template <std::size_t N> 332 template <size_t N>
307 void BindStatic(const VKDevice& device, VKScheduler& scheduler) const { 333 void BindStatic(const VKDevice& device, VKScheduler& scheduler) const {
308 if (device.IsExtExtendedDynamicStateSupported()) { 334 if (device.IsExtExtendedDynamicStateSupported()) {
309 if (index.buffer) { 335 if (index.buffer) {
@@ -320,7 +346,7 @@ private:
320 } 346 }
321 } 347 }
322 348
323 template <std::size_t N, bool is_indexed, bool has_extended_dynamic_state> 349 template <size_t N, bool is_indexed, bool has_extended_dynamic_state>
324 void BindStatic(VKScheduler& scheduler) const { 350 void BindStatic(VKScheduler& scheduler) const {
325 static_assert(N <= Maxwell::NumVertexArrays); 351 static_assert(N <= Maxwell::NumVertexArrays);
326 if constexpr (N == 0) { 352 if constexpr (N == 0) {
@@ -385,20 +411,23 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
385 Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_, 411 Core::Memory::Memory& cpu_memory_, VKScreenInfo& screen_info_,
386 const VKDevice& device_, VKMemoryManager& memory_manager_, 412 const VKDevice& device_, VKMemoryManager& memory_manager_,
387 StateTracker& state_tracker_, VKScheduler& scheduler_) 413 StateTracker& state_tracker_, VKScheduler& scheduler_)
388 : RasterizerAccelerated(cpu_memory_), gpu(gpu_), gpu_memory(gpu_memory_), 414 : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
389 maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), 415 gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
390 device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), 416 screen_info{screen_info_}, device{device_}, memory_manager{memory_manager_},
391 scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), 417 state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler),
392 descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), 418 staging_pool(device, memory_manager, scheduler), descriptor_pool(device, scheduler),
393 renderpass_cache(device), 419 update_descriptor_queue(device, scheduler),
420 blit_image(device, scheduler, state_tracker, descriptor_pool),
394 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 421 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
395 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 422 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
396 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 423 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
397 texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), 424 texture_cache_runtime{device, scheduler, memory_manager, staging_pool, blit_image},
425 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
398 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 426 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
399 descriptor_pool, update_descriptor_queue, renderpass_cache), 427 descriptor_pool, update_descriptor_queue),
400 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, staging_pool), 428 buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_manager, scheduler, stream_buffer,
401 sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), 429 staging_pool),
430 query_cache{*this, maxwell3d, gpu_memory, device, scheduler},
402 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, 431 fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
403 scheduler), 432 scheduler),
404 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 433 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
@@ -427,9 +456,10 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
427 const DrawParameters draw_params = 456 const DrawParameters draw_params =
428 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); 457 SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
429 458
430 update_descriptor_queue.Acquire(); 459 auto lock = texture_cache.AcquireLock();
431 sampled_views.clear(); 460 texture_cache.SynchronizeGraphicsDescriptors();
432 image_views.clear(); 461
462 texture_cache.UpdateRenderTargets(false);
433 463
434 const auto shaders = pipeline_cache.GetShaders(); 464 const auto shaders = pipeline_cache.GetShaders();
435 key.shaders = GetShaderAddresses(shaders); 465 key.shaders = GetShaderAddresses(shaders);
@@ -437,30 +467,24 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
437 467
438 buffer_cache.Unmap(); 468 buffer_cache.Unmap();
439 469
440 const Texceptions texceptions = UpdateAttachments(false); 470 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
441 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 471 key.renderpass = framebuffer->RenderPass();
442
443 key.renderpass_params = GetRenderPassParams(texceptions);
444 key.padding = 0;
445 472
446 auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); 473 auto* const pipeline =
474 pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders);
447 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { 475 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
448 // Async graphics pipeline was not ready. 476 // Async graphics pipeline was not ready.
449 return; 477 return;
450 } 478 }
451 479
452 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
453
454 const auto renderpass = pipeline->GetRenderPass();
455 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
456 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
457
458 UpdateDynamicStates();
459
460 buffer_bindings.Bind(device, scheduler); 480 buffer_bindings.Bind(device, scheduler);
461 481
462 BeginTransformFeedback(); 482 BeginTransformFeedback();
463 483
484 scheduler.RequestRenderpass(framebuffer);
485 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
486 UpdateDynamicStates();
487
464 const auto pipeline_layout = pipeline->GetLayout(); 488 const auto pipeline_layout = pipeline->GetLayout();
465 const auto descriptor_set = pipeline->CommitDescriptorSet(); 489 const auto descriptor_set = pipeline->CommitDescriptorSet();
466 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 490 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
@@ -481,9 +505,6 @@ void RasterizerVulkan::Clear() {
481 return; 505 return;
482 } 506 }
483 507
484 sampled_views.clear();
485 image_views.clear();
486
487 query_cache.UpdateCounters(); 508 query_cache.UpdateCounters();
488 509
489 const auto& regs = maxwell3d.regs; 510 const auto& regs = maxwell3d.regs;
@@ -495,20 +516,24 @@ void RasterizerVulkan::Clear() {
495 return; 516 return;
496 } 517 }
497 518
498 [[maybe_unused]] const auto texceptions = UpdateAttachments(true); 519 auto lock = texture_cache.AcquireLock();
499 DEBUG_ASSERT(texceptions.none()); 520 texture_cache.UpdateRenderTargets(true);
500 SetupImageTransitions(0, color_attachments, zeta_attachment); 521 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
522 const VkExtent2D render_area = framebuffer->RenderArea();
523 scheduler.RequestRenderpass(framebuffer);
501 524
502 const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); 525 VkClearRect clear_rect{
503 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); 526 .rect = GetScissorState(regs, 0),
504 scheduler.RequestRenderpass(renderpass, framebuffer, render_area); 527 .baseArrayLayer = regs.clear_buffers.layer,
505 528 .layerCount = 1,
506 VkClearRect clear_rect; 529 };
507 clear_rect.baseArrayLayer = regs.clear_buffers.layer; 530 if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
508 clear_rect.layerCount = 1; 531 return;
509 clear_rect.rect = GetScissorState(regs, 0); 532 }
510 clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); 533 clear_rect.rect.extent = VkExtent2D{
511 clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); 534 .width = std::min(clear_rect.rect.extent.width, render_area.width),
535 .height = std::min(clear_rect.rect.extent.height, render_area.height),
536 };
512 537
513 if (use_color) { 538 if (use_color) {
514 VkClearValue clear_value; 539 VkClearValue clear_value;
@@ -549,9 +574,6 @@ void RasterizerVulkan::Clear() {
549 574
550void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 575void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
551 MICROPROFILE_SCOPE(Vulkan_Compute); 576 MICROPROFILE_SCOPE(Vulkan_Compute);
552 update_descriptor_queue.Acquire();
553 sampled_views.clear();
554 image_views.clear();
555 577
556 query_cache.UpdateCounters(); 578 query_cache.UpdateCounters();
557 579
@@ -570,29 +592,43 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
570 // Compute dispatches can't be executed inside a renderpass 592 // Compute dispatches can't be executed inside a renderpass
571 scheduler.RequestOutsideRenderPassOperationContext(); 593 scheduler.RequestOutsideRenderPassOperationContext();
572 594
573 buffer_cache.Map(CalculateComputeStreamBufferSize()); 595 image_view_indices.clear();
596 sampler_handles.clear();
597
598 auto lock = texture_cache.AcquireLock();
599 texture_cache.SynchronizeComputeDescriptors();
574 600
575 const auto& entries = pipeline.GetEntries(); 601 const auto& entries = pipeline.GetEntries();
576 SetupComputeConstBuffers(entries);
577 SetupComputeGlobalBuffers(entries);
578 SetupComputeUniformTexels(entries); 602 SetupComputeUniformTexels(entries);
579 SetupComputeTextures(entries); 603 SetupComputeTextures(entries);
580 SetupComputeStorageTexels(entries); 604 SetupComputeStorageTexels(entries);
581 SetupComputeImages(entries); 605 SetupComputeImages(entries);
582 606
583 buffer_cache.Unmap(); 607 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
608 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
584 609
585 TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 610 buffer_cache.Map(CalculateComputeStreamBufferSize());
586 VK_ACCESS_SHADER_READ_BIT);
587 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
588 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
589 611
612 update_descriptor_queue.Acquire();
613
614 SetupComputeConstBuffers(entries);
615 SetupComputeGlobalBuffers(entries);
616
617 ImageViewId* image_view_id_ptr = image_view_ids.data();
618 VkSampler* sampler_ptr = sampler_handles.data();
619 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
620 sampler_ptr);
621
622 buffer_cache.Unmap();
623
624 const VkPipeline pipeline_handle = pipeline.GetHandle();
625 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
626 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
590 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, 627 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
591 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), 628 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
592 layout = pipeline.GetLayout(), 629 descriptor_set](vk::CommandBuffer cmdbuf) {
593 descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) {
594 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); 630 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
595 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, 631 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, DESCRIPTOR_SET,
596 descriptor_set, {}); 632 descriptor_set, {});
597 cmdbuf.Dispatch(grid_x, grid_y, grid_z); 633 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
598 }); 634 });
@@ -613,7 +649,10 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
613 if (addr == 0 || size == 0) { 649 if (addr == 0 || size == 0) {
614 return; 650 return;
615 } 651 }
616 texture_cache.FlushRegion(addr, size); 652 {
653 auto lock = texture_cache.AcquireLock();
654 texture_cache.DownloadMemory(addr, size);
655 }
617 buffer_cache.FlushRegion(addr, size); 656 buffer_cache.FlushRegion(addr, size);
618 query_cache.FlushRegion(addr, size); 657 query_cache.FlushRegion(addr, size);
619} 658}
@@ -622,14 +661,18 @@ bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
622 if (!Settings::IsGPULevelHigh()) { 661 if (!Settings::IsGPULevelHigh()) {
623 return buffer_cache.MustFlushRegion(addr, size); 662 return buffer_cache.MustFlushRegion(addr, size);
624 } 663 }
625 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 664 return texture_cache.IsRegionGpuModified(addr, size) ||
665 buffer_cache.MustFlushRegion(addr, size);
626} 666}
627 667
628void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 668void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
629 if (addr == 0 || size == 0) { 669 if (addr == 0 || size == 0) {
630 return; 670 return;
631 } 671 }
632 texture_cache.InvalidateRegion(addr, size); 672 {
673 auto lock = texture_cache.AcquireLock();
674 texture_cache.WriteMemory(addr, size);
675 }
633 pipeline_cache.InvalidateRegion(addr, size); 676 pipeline_cache.InvalidateRegion(addr, size);
634 buffer_cache.InvalidateRegion(addr, size); 677 buffer_cache.InvalidateRegion(addr, size);
635 query_cache.InvalidateRegion(addr, size); 678 query_cache.InvalidateRegion(addr, size);
@@ -639,17 +682,28 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
639 if (addr == 0 || size == 0) { 682 if (addr == 0 || size == 0) {
640 return; 683 return;
641 } 684 }
642 texture_cache.OnCPUWrite(addr, size); 685 {
686 auto lock = texture_cache.AcquireLock();
687 texture_cache.WriteMemory(addr, size);
688 }
643 pipeline_cache.OnCPUWrite(addr, size); 689 pipeline_cache.OnCPUWrite(addr, size);
644 buffer_cache.OnCPUWrite(addr, size); 690 buffer_cache.OnCPUWrite(addr, size);
645} 691}
646 692
647void RasterizerVulkan::SyncGuestHost() { 693void RasterizerVulkan::SyncGuestHost() {
648 texture_cache.SyncGuestHost();
649 buffer_cache.SyncGuestHost(); 694 buffer_cache.SyncGuestHost();
650 pipeline_cache.SyncGuestHost(); 695 pipeline_cache.SyncGuestHost();
651} 696}
652 697
698void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
699 {
700 auto lock = texture_cache.AcquireLock();
701 texture_cache.UnmapMemory(addr, size);
702 }
703 buffer_cache.OnCPUWrite(addr, size);
704 pipeline_cache.OnCPUWrite(addr, size);
705}
706
653void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 707void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
654 if (!gpu.IsAsync()) { 708 if (!gpu.IsAsync()) {
655 gpu_memory.Write<u32>(addr, value); 709 gpu_memory.Write<u32>(addr, value);
@@ -700,6 +754,14 @@ void RasterizerVulkan::WaitForIdle() {
700 }); 754 });
701} 755}
702 756
757void RasterizerVulkan::FragmentBarrier() {
758 // We already put barriers when a render pass finishes
759}
760
761void RasterizerVulkan::TiledCacheBarrier() {
762 // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend
763}
764
703void RasterizerVulkan::FlushCommands() { 765void RasterizerVulkan::FlushCommands() {
704 if (draw_counter > 0) { 766 if (draw_counter > 0) {
705 draw_counter = 0; 767 draw_counter = 0;
@@ -710,14 +772,20 @@ void RasterizerVulkan::FlushCommands() {
710void RasterizerVulkan::TickFrame() { 772void RasterizerVulkan::TickFrame() {
711 draw_counter = 0; 773 draw_counter = 0;
712 update_descriptor_queue.TickFrame(); 774 update_descriptor_queue.TickFrame();
775 fence_manager.TickFrame();
713 buffer_cache.TickFrame(); 776 buffer_cache.TickFrame();
714 staging_pool.TickFrame(); 777 staging_pool.TickFrame();
778 {
779 auto lock = texture_cache.AcquireLock();
780 texture_cache.TickFrame();
781 }
715} 782}
716 783
717bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 784bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
718 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 785 const Tegra::Engines::Fermi2D::Surface& dst,
719 const Tegra::Engines::Fermi2D::Config& copy_config) { 786 const Tegra::Engines::Fermi2D::Config& copy_config) {
720 texture_cache.DoFermiCopy(src, dst, copy_config); 787 auto lock = texture_cache.AcquireLock();
788 texture_cache.BlitImage(dst, src, copy_config);
721 return true; 789 return true;
722} 790}
723 791
@@ -727,20 +795,16 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
727 return false; 795 return false;
728 } 796 }
729 797
730 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; 798 auto lock = texture_cache.AcquireLock();
731 if (!surface) { 799 ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr);
800 if (!image_view) {
732 return false; 801 return false;
733 } 802 }
734 803
735 // Verify that the cached surface is the same size and format as the requested framebuffer 804 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D);
736 const auto& params{surface->GetSurfaceParams()}; 805 screen_info.width = image_view->size.width;
737 ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); 806 screen_info.height = image_view->size.height;
738 ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); 807 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
739
740 screen_info.image = &surface->GetImage();
741 screen_info.width = params.width;
742 screen_info.height = params.height;
743 screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
744 return true; 808 return true;
745} 809}
746 810
@@ -765,103 +829,6 @@ void RasterizerVulkan::FlushWork() {
765 draw_counter = 0; 829 draw_counter = 0;
766} 830}
767 831
768RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
769 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
770
771 const auto& regs = maxwell3d.regs;
772 auto& dirty = maxwell3d.dirty.flags;
773 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
774 dirty[VideoCommon::Dirty::RenderTargets] = false;
775
776 texture_cache.GuardRenderTargets(true);
777
778 Texceptions texceptions;
779 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
780 if (update_rendertargets) {
781 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
782 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
783 }
784 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
785 texceptions[rt] = true;
786 }
787 }
788
789 if (update_rendertargets) {
790 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
791 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
792 }
793 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
794 texceptions[ZETA_TEXCEPTION_INDEX] = true;
795 }
796
797 texture_cache.GuardRenderTargets(false);
798
799 return texceptions;
800}
801
802bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
803 bool overlap = false;
804 for (auto& [view, layout] : sampled_views) {
805 if (!attachment.IsSameSurface(*view)) {
806 continue;
807 }
808 overlap = true;
809 *layout = VK_IMAGE_LAYOUT_GENERAL;
810 }
811 return overlap;
812}
813
814std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
815 VkRenderPass renderpass) {
816 FramebufferCacheKey key{
817 .renderpass = renderpass,
818 .width = std::numeric_limits<u32>::max(),
819 .height = std::numeric_limits<u32>::max(),
820 .layers = std::numeric_limits<u32>::max(),
821 .views = {},
822 };
823
824 const auto try_push = [&key](const View& view) {
825 if (!view) {
826 return false;
827 }
828 key.views.push_back(view->GetAttachment());
829 key.width = std::min(key.width, view->GetWidth());
830 key.height = std::min(key.height, view->GetHeight());
831 key.layers = std::min(key.layers, view->GetNumLayers());
832 return true;
833 };
834
835 const auto& regs = maxwell3d.regs;
836 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
837 for (std::size_t index = 0; index < num_attachments; ++index) {
838 if (try_push(color_attachments[index])) {
839 texture_cache.MarkColorBufferInUse(index);
840 }
841 }
842 if (try_push(zeta_attachment)) {
843 texture_cache.MarkDepthBufferInUse();
844 }
845
846 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
847 auto& framebuffer = fbentry->second;
848 if (is_cache_miss) {
849 framebuffer = device.GetLogical().CreateFramebuffer({
850 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
851 .pNext = nullptr,
852 .flags = 0,
853 .renderPass = key.renderpass,
854 .attachmentCount = static_cast<u32>(key.views.size()),
855 .pAttachments = key.views.data(),
856 .width = key.width,
857 .height = key.height,
858 .layers = key.layers,
859 });
860 }
861
862 return {*framebuffer, VkExtent2D{key.width, key.height}};
863}
864
865RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, 832RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
866 BufferBindings& buffer_bindings, 833 BufferBindings& buffer_bindings,
867 bool is_indexed, 834 bool is_indexed,
@@ -885,50 +852,37 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
885 852
886void RasterizerVulkan::SetupShaderDescriptors( 853void RasterizerVulkan::SetupShaderDescriptors(
887 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { 854 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
888 texture_cache.GuardSamplers(true); 855 image_view_indices.clear();
889 856 sampler_handles.clear();
890 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 857 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
891 // Skip VertexA stage
892 Shader* const shader = shaders[stage + 1]; 858 Shader* const shader = shaders[stage + 1];
893 if (!shader) { 859 if (!shader) {
894 continue; 860 continue;
895 } 861 }
896 const auto& entries = shader->GetEntries(); 862 const auto& entries = shader->GetEntries();
897 SetupGraphicsConstBuffers(entries, stage);
898 SetupGraphicsGlobalBuffers(entries, stage);
899 SetupGraphicsUniformTexels(entries, stage); 863 SetupGraphicsUniformTexels(entries, stage);
900 SetupGraphicsTextures(entries, stage); 864 SetupGraphicsTextures(entries, stage);
901 SetupGraphicsStorageTexels(entries, stage); 865 SetupGraphicsStorageTexels(entries, stage);
902 SetupGraphicsImages(entries, stage); 866 SetupGraphicsImages(entries, stage);
903 } 867 }
904 texture_cache.GuardSamplers(false); 868 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
905} 869 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
906 870
907void RasterizerVulkan::SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color, 871 update_descriptor_queue.Acquire();
908 const ZetaAttachment& zeta) {
909 TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
910 TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
911 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
912 872
913 for (std::size_t rt = 0; rt < color.size(); ++rt) { 873 ImageViewId* image_view_id_ptr = image_view_ids.data();
914 const auto color_attachment = color[rt]; 874 VkSampler* sampler_ptr = sampler_handles.data();
915 if (color_attachment == nullptr) { 875 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
876 // Skip VertexA stage
877 Shader* const shader = shaders[stage + 1];
878 if (!shader) {
916 continue; 879 continue;
917 } 880 }
918 const auto image_layout = 881 const auto& entries = shader->GetEntries();
919 texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; 882 SetupGraphicsConstBuffers(entries, stage);
920 color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 883 SetupGraphicsGlobalBuffers(entries, stage);
921 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 884 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
922 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); 885 sampler_ptr);
923 }
924
925 if (zeta != nullptr) {
926 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
927 ? VK_IMAGE_LAYOUT_GENERAL
928 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
929 zeta->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
930 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
931 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
932 } 886 }
933} 887}
934 888
@@ -1000,7 +954,7 @@ void RasterizerVulkan::EndTransformFeedback() {
1000void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { 954void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1001 const auto& regs = maxwell3d.regs; 955 const auto& regs = maxwell3d.regs;
1002 956
1003 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 957 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
1004 const auto& vertex_array = regs.vertex_array[index]; 958 const auto& vertex_array = regs.vertex_array[index];
1005 if (!vertex_array.IsEnabled()) { 959 if (!vertex_array.IsEnabled()) {
1006 continue; 960 continue;
@@ -1009,7 +963,7 @@ void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
1009 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 963 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1010 964
1011 ASSERT(end >= start); 965 ASSERT(end >= start);
1012 const std::size_t size = end - start; 966 const size_t size = end - start;
1013 if (size == 0) { 967 if (size == 0) {
1014 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); 968 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0);
1015 continue; 969 continue;
@@ -1070,7 +1024,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
1070 } 1024 }
1071} 1025}
1072 1026
1073void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { 1027void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) {
1074 MICROPROFILE_SCOPE(Vulkan_ConstBuffers); 1028 MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
1075 const auto& shader_stage = maxwell3d.state.shader_stages[stage]; 1029 const auto& shader_stage = maxwell3d.state.shader_stages[stage];
1076 for (const auto& entry : entries.const_buffers) { 1030 for (const auto& entry : entries.const_buffers) {
@@ -1078,7 +1032,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
1078 } 1032 }
1079} 1033}
1080 1034
1081void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { 1035void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) {
1082 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); 1036 MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
1083 const auto& cbufs{maxwell3d.state.shader_stages[stage]}; 1037 const auto& cbufs{maxwell3d.state.shader_stages[stage]};
1084 1038
@@ -1088,37 +1042,49 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
1088 } 1042 }
1089} 1043}
1090 1044
1091void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { 1045void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
1092 MICROPROFILE_SCOPE(Vulkan_Textures); 1046 MICROPROFILE_SCOPE(Vulkan_Textures);
1047 const auto& regs = maxwell3d.regs;
1048 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1093 for (const auto& entry : entries.uniform_texels) { 1049 for (const auto& entry : entries.uniform_texels) {
1094 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1050 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1095 SetupUniformTexels(image, entry); 1051 image_view_indices.push_back(handle.image);
1096 } 1052 }
1097} 1053}
1098 1054
1099void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { 1055void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
1100 MICROPROFILE_SCOPE(Vulkan_Textures); 1056 MICROPROFILE_SCOPE(Vulkan_Textures);
1057 const auto& regs = maxwell3d.regs;
1058 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1101 for (const auto& entry : entries.samplers) { 1059 for (const auto& entry : entries.samplers) {
1102 for (std::size_t i = 0; i < entry.size; ++i) { 1060 for (size_t index = 0; index < entry.size; ++index) {
1103 const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); 1061 const TextureHandle handle =
1104 SetupTexture(texture, entry); 1062 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
1063 image_view_indices.push_back(handle.image);
1064
1065 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
1066 sampler_handles.push_back(sampler->Handle());
1105 } 1067 }
1106 } 1068 }
1107} 1069}
1108 1070
1109void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { 1071void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
1110 MICROPROFILE_SCOPE(Vulkan_Textures); 1072 MICROPROFILE_SCOPE(Vulkan_Textures);
1073 const auto& regs = maxwell3d.regs;
1074 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1111 for (const auto& entry : entries.storage_texels) { 1075 for (const auto& entry : entries.storage_texels) {
1112 const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; 1076 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1113 SetupStorageTexel(image, entry); 1077 image_view_indices.push_back(handle.image);
1114 } 1078 }
1115} 1079}
1116 1080
1117void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 1081void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
1118 MICROPROFILE_SCOPE(Vulkan_Images); 1082 MICROPROFILE_SCOPE(Vulkan_Images);
1083 const auto& regs = maxwell3d.regs;
1084 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
1119 for (const auto& entry : entries.images) { 1085 for (const auto& entry : entries.images) {
1120 const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; 1086 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
1121 SetupImage(tic, entry); 1087 image_view_indices.push_back(handle.image);
1122 } 1088 }
1123} 1089}
1124 1090
@@ -1128,11 +1094,12 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
1128 for (const auto& entry : entries.const_buffers) { 1094 for (const auto& entry : entries.const_buffers) {
1129 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 1095 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
1130 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 1096 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
1131 Tegra::Engines::ConstBufferInfo buffer; 1097 const Tegra::Engines::ConstBufferInfo info{
1132 buffer.address = config.Address(); 1098 .address = config.Address(),
1133 buffer.size = config.size; 1099 .size = config.size,
1134 buffer.enabled = mask[entry.GetIndex()]; 1100 .enabled = mask[entry.GetIndex()],
1135 SetupConstBuffer(entry, buffer); 1101 };
1102 SetupConstBuffer(entry, info);
1136 } 1103 }
1137} 1104}
1138 1105
@@ -1147,35 +1114,46 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1147 1114
1148void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { 1115void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1149 MICROPROFILE_SCOPE(Vulkan_Textures); 1116 MICROPROFILE_SCOPE(Vulkan_Textures);
1117 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1150 for (const auto& entry : entries.uniform_texels) { 1118 for (const auto& entry : entries.uniform_texels) {
1151 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1119 const TextureHandle handle =
1152 SetupUniformTexels(image, entry); 1120 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1121 image_view_indices.push_back(handle.image);
1153 } 1122 }
1154} 1123}
1155 1124
1156void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { 1125void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1157 MICROPROFILE_SCOPE(Vulkan_Textures); 1126 MICROPROFILE_SCOPE(Vulkan_Textures);
1127 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1158 for (const auto& entry : entries.samplers) { 1128 for (const auto& entry : entries.samplers) {
1159 for (std::size_t i = 0; i < entry.size; ++i) { 1129 for (size_t index = 0; index < entry.size; ++index) {
1160 const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); 1130 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
1161 SetupTexture(texture, entry); 1131 COMPUTE_SHADER_INDEX, index);
1132 image_view_indices.push_back(handle.image);
1133
1134 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
1135 sampler_handles.push_back(sampler->Handle());
1162 } 1136 }
1163 } 1137 }
1164} 1138}
1165 1139
1166void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { 1140void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1167 MICROPROFILE_SCOPE(Vulkan_Textures); 1141 MICROPROFILE_SCOPE(Vulkan_Textures);
1142 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1168 for (const auto& entry : entries.storage_texels) { 1143 for (const auto& entry : entries.storage_texels) {
1169 const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1144 const TextureHandle handle =
1170 SetupStorageTexel(image, entry); 1145 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1146 image_view_indices.push_back(handle.image);
1171 } 1147 }
1172} 1148}
1173 1149
1174void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 1150void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1175 MICROPROFILE_SCOPE(Vulkan_Images); 1151 MICROPROFILE_SCOPE(Vulkan_Images);
1152 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
1176 for (const auto& entry : entries.images) { 1153 for (const auto& entry : entries.images) {
1177 const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; 1154 const TextureHandle handle =
1178 SetupImage(tic, entry); 1155 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
1156 image_view_indices.push_back(handle.image);
1179 } 1157 }
1180} 1158}
1181 1159
@@ -1186,14 +1164,12 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1186 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); 1164 update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE);
1187 return; 1165 return;
1188 } 1166 }
1189
1190 // Align the size to avoid bad std140 interactions 1167 // Align the size to avoid bad std140 interactions
1191 const std::size_t size = 1168 const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1192 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1193 ASSERT(size <= MaxConstbufferSize); 1169 ASSERT(size <= MaxConstbufferSize);
1194 1170
1195 const auto info = 1171 const u64 alignment = device.GetUniformBufferAlignment();
1196 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1172 const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment);
1197 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1173 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1198} 1174}
1199 1175
@@ -1206,7 +1182,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1206 // because Vulkan doesn't like empty buffers. 1182 // because Vulkan doesn't like empty buffers.
1207 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1183 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1208 // default buffer. 1184 // default buffer.
1209 static constexpr std::size_t dummy_size = 4; 1185 static constexpr size_t dummy_size = 4;
1210 const auto info = buffer_cache.GetEmptyBuffer(dummy_size); 1186 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1211 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); 1187 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1212 return; 1188 return;
@@ -1217,55 +1193,6 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1217 update_descriptor_queue.AddBuffer(info.handle, info.offset, size); 1193 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1218} 1194}
1219 1195
1220void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
1221 const UniformTexelEntry& entry) {
1222 const auto view = texture_cache.GetTextureSurface(tic, entry);
1223 ASSERT(view->IsBufferView());
1224
1225 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1226}
1227
1228void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
1229 const SamplerEntry& entry) {
1230 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1231 ASSERT(!view->IsBufferView());
1232
1233 const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
1234 texture.tic.z_source, texture.tic.w_source);
1235 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1236 update_descriptor_queue.AddSampledImage(sampler, image_view);
1237
1238 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1239 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1240 sampled_views.push_back(ImageView{std::move(view), image_layout});
1241}
1242
1243void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
1244 const StorageTexelEntry& entry) {
1245 const auto view = texture_cache.GetImageSurface(tic, entry);
1246 ASSERT(view->IsBufferView());
1247
1248 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1249}
1250
1251void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
1252 auto view = texture_cache.GetImageSurface(tic, entry);
1253
1254 if (entry.is_written) {
1255 view->MarkAsModified(texture_cache.Tick());
1256 }
1257
1258 UNIMPLEMENTED_IF(tic.IsBuffer());
1259
1260 const VkImageView image_view =
1261 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1262 update_descriptor_queue.AddImage(image_view);
1263
1264 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1265 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1266 image_views.push_back(ImageView{std::move(view), image_layout});
1267}
1268
1269void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 1196void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
1270 if (!state_tracker.TouchViewports()) { 1197 if (!state_tracker.TouchViewports()) {
1271 return; 1198 return;
@@ -1457,8 +1384,8 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1457 }); 1384 });
1458} 1385}
1459 1386
1460std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { 1387size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
1461 std::size_t size = CalculateVertexArraysSize(); 1388 size_t size = CalculateVertexArraysSize();
1462 if (is_indexed) { 1389 if (is_indexed) {
1463 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); 1390 size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
1464 } 1391 }
@@ -1466,15 +1393,15 @@ std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed)
1466 return size; 1393 return size;
1467} 1394}
1468 1395
1469std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { 1396size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
1470 return Tegra::Engines::KeplerCompute::NumConstBuffers * 1397 return Tegra::Engines::KeplerCompute::NumConstBuffers *
1471 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 1398 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
1472} 1399}
1473 1400
1474std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { 1401size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1475 const auto& regs = maxwell3d.regs; 1402 const auto& regs = maxwell3d.regs;
1476 1403
1477 std::size_t size = 0; 1404 size_t size = 0;
1478 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 1405 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
1479 // This implementation assumes that all attributes are used in the shader. 1406 // This implementation assumes that all attributes are used in the shader.
1480 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1407 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
@@ -1486,12 +1413,12 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1486 return size; 1413 return size;
1487} 1414}
1488 1415
1489std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { 1416size_t RasterizerVulkan::CalculateIndexBufferSize() const {
1490 return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * 1417 return static_cast<size_t>(maxwell3d.regs.index_array.count) *
1491 static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); 1418 static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
1492} 1419}
1493 1420
1494std::size_t RasterizerVulkan::CalculateConstBufferSize( 1421size_t RasterizerVulkan::CalculateConstBufferSize(
1495 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { 1422 const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
1496 if (entry.IsIndirect()) { 1423 if (entry.IsIndirect()) {
1497 // Buffer is accessed indirectly, so upload the entire thing 1424 // Buffer is accessed indirectly, so upload the entire thing
@@ -1502,37 +1429,10 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
1502 } 1429 }
1503} 1430}
1504 1431
1505RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
1506 const auto& regs = maxwell3d.regs;
1507 const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
1508
1509 RenderPassParams params;
1510 params.color_formats = {};
1511 std::size_t color_texceptions = 0;
1512
1513 std::size_t index = 0;
1514 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
1515 const auto& rendertarget = regs.rt[rt];
1516 if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) {
1517 continue;
1518 }
1519 params.color_formats[index] = static_cast<u8>(rendertarget.format);
1520 color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index;
1521 ++index;
1522 }
1523 params.num_color_attachments = static_cast<u8>(index);
1524 params.texceptions = static_cast<u8>(color_texceptions);
1525
1526 params.zeta_format = regs.zeta_enable ? static_cast<u8>(regs.zeta.format) : 0;
1527 params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
1528 return params;
1529}
1530
1531VkBuffer RasterizerVulkan::DefaultBuffer() { 1432VkBuffer RasterizerVulkan::DefaultBuffer() {
1532 if (default_buffer) { 1433 if (default_buffer) {
1533 return *default_buffer; 1434 return *default_buffer;
1534 } 1435 }
1535
1536 default_buffer = device.GetLogical().CreateBuffer({ 1436 default_buffer = device.GetLogical().CreateBuffer({
1537 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 1437 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1538 .pNext = nullptr, 1438 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 30ec58eb4..990f9e031 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -11,11 +11,11 @@
11#include <vector> 11#include <vector>
12 12
13#include <boost/container/static_vector.hpp> 13#include <boost/container/static_vector.hpp>
14#include <boost/functional/hash.hpp>
15 14
16#include "common/common_types.h" 15#include "common/common_types.h"
17#include "video_core/rasterizer_accelerated.h" 16#include "video_core/rasterizer_accelerated.h"
18#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_vulkan/blit_image.h"
19#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 19#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 20#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h" 21#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -24,10 +24,9 @@
24#include "video_core/renderer_vulkan/vk_memory_manager.h" 24#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 25#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 26#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_sampler_cache.h"
29#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_stream_buffer.h"
31#include "video_core/renderer_vulkan/vk_texture_cache.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
32#include "video_core/renderer_vulkan/vk_update_descriptor.h" 31#include "video_core/renderer_vulkan/vk_update_descriptor.h"
33#include "video_core/renderer_vulkan/wrapper.h" 32#include "video_core/renderer_vulkan/wrapper.h"
@@ -49,60 +48,9 @@ namespace Vulkan {
49 48
50struct VKScreenInfo; 49struct VKScreenInfo;
51 50
52using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
53
54struct FramebufferCacheKey {
55 VkRenderPass renderpass{};
56 u32 width = 0;
57 u32 height = 0;
58 u32 layers = 0;
59 ImageViewsPack views;
60
61 std::size_t Hash() const noexcept {
62 std::size_t hash = 0;
63 boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
64 for (const auto& view : views) {
65 boost::hash_combine(hash, static_cast<VkImageView>(view));
66 }
67 boost::hash_combine(hash, width);
68 boost::hash_combine(hash, height);
69 boost::hash_combine(hash, layers);
70 return hash;
71 }
72
73 bool operator==(const FramebufferCacheKey& rhs) const noexcept {
74 return std::tie(renderpass, views, width, height, layers) ==
75 std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
76 }
77
78 bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
79 return !operator==(rhs);
80 }
81};
82
83} // namespace Vulkan
84
85namespace std {
86
87template <>
88struct hash<Vulkan::FramebufferCacheKey> {
89 std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
90 return k.Hash();
91 }
92};
93
94} // namespace std
95
96namespace Vulkan {
97
98class StateTracker; 51class StateTracker;
99class BufferBindings; 52class BufferBindings;
100 53
101struct ImageView {
102 View view;
103 VkImageLayout* layout = nullptr;
104};
105
106class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 54class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
107public: 55public:
108 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 56 explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@@ -123,15 +71,18 @@ public:
123 void InvalidateRegion(VAddr addr, u64 size) override; 71 void InvalidateRegion(VAddr addr, u64 size) override;
124 void OnCPUWrite(VAddr addr, u64 size) override; 72 void OnCPUWrite(VAddr addr, u64 size) override;
125 void SyncGuestHost() override; 73 void SyncGuestHost() override;
74 void UnmapMemory(VAddr addr, u64 size) override;
126 void SignalSemaphore(GPUVAddr addr, u32 value) override; 75 void SignalSemaphore(GPUVAddr addr, u32 value) override;
127 void SignalSyncPoint(u32 value) override; 76 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override; 77 void ReleaseFences() override;
129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 78 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
130 void WaitForIdle() override; 79 void WaitForIdle() override;
80 void FragmentBarrier() override;
81 void TiledCacheBarrier() override;
131 void FlushCommands() override; 82 void FlushCommands() override;
132 void TickFrame() override; 83 void TickFrame() override;
133 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 84 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
134 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 85 const Tegra::Engines::Fermi2D::Surface& dst,
135 const Tegra::Engines::Fermi2D::Config& copy_config) override; 86 const Tegra::Engines::Fermi2D::Config& copy_config) override;
136 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 87 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
137 u32 pixel_stride) override; 88 u32 pixel_stride) override;
@@ -145,11 +96,17 @@ public:
145 } 96 }
146 97
147 /// Maximum supported size that a constbuffer can have in bytes. 98 /// Maximum supported size that a constbuffer can have in bytes.
148 static constexpr std::size_t MaxConstbufferSize = 0x10000; 99 static constexpr size_t MaxConstbufferSize = 0x10000;
149 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, 100 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
150 "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); 101 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
151 102
152private: 103private:
104 static constexpr size_t MAX_TEXTURES = 192;
105 static constexpr size_t MAX_IMAGES = 48;
106 static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
107
108 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
109
153 struct DrawParameters { 110 struct DrawParameters {
154 void Draw(vk::CommandBuffer cmdbuf) const; 111 void Draw(vk::CommandBuffer cmdbuf) const;
155 112
@@ -160,23 +117,8 @@ private:
160 bool is_indexed = 0; 117 bool is_indexed = 0;
161 }; 118 };
162 119
163 using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>;
164 using ZetaAttachment = View;
165
166 using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
167
168 static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
169 static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
170
171 void FlushWork(); 120 void FlushWork();
172 121
173 /// @brief Updates the currently bound attachments
174 /// @param is_clear True when the framebuffer is updated as a clear
175 /// @return Bitfield of attachments being used as sampled textures
176 Texceptions UpdateAttachments(bool is_clear);
177
178 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
179
180 /// Setups geometry buffers and state. 122 /// Setups geometry buffers and state.
181 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, 123 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
182 bool is_indexed, bool is_instanced); 124 bool is_indexed, bool is_instanced);
@@ -184,17 +126,12 @@ private:
184 /// Setup descriptors in the graphics pipeline. 126 /// Setup descriptors in the graphics pipeline.
185 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); 127 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
186 128
187 void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color,
188 const ZetaAttachment& zeta);
189
190 void UpdateDynamicStates(); 129 void UpdateDynamicStates();
191 130
192 void BeginTransformFeedback(); 131 void BeginTransformFeedback();
193 132
194 void EndTransformFeedback(); 133 void EndTransformFeedback();
195 134
196 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
197
198 void SetupVertexArrays(BufferBindings& buffer_bindings); 135 void SetupVertexArrays(BufferBindings& buffer_bindings);
199 136
200 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); 137 void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
@@ -240,14 +177,6 @@ private:
240 177
241 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); 178 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
242 179
243 void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
244
245 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
246
247 void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
248
249 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
250
251 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 180 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
252 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 181 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
253 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 182 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -264,18 +193,16 @@ private:
264 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 193 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
265 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 194 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
266 195
267 std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; 196 size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
268
269 std::size_t CalculateComputeStreamBufferSize() const;
270 197
271 std::size_t CalculateVertexArraysSize() const; 198 size_t CalculateComputeStreamBufferSize() const;
272 199
273 std::size_t CalculateIndexBufferSize() const; 200 size_t CalculateVertexArraysSize() const;
274 201
275 std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, 202 size_t CalculateIndexBufferSize() const;
276 const Tegra::Engines::ConstBufferInfo& buffer) const;
277 203
278 RenderPassParams GetRenderPassParams(Texceptions texceptions) const; 204 size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
205 const Tegra::Engines::ConstBufferInfo& buffer) const;
279 206
280 VkBuffer DefaultBuffer(); 207 VkBuffer DefaultBuffer();
281 208
@@ -290,18 +217,19 @@ private:
290 StateTracker& state_tracker; 217 StateTracker& state_tracker;
291 VKScheduler& scheduler; 218 VKScheduler& scheduler;
292 219
220 VKStreamBuffer stream_buffer;
293 VKStagingBufferPool staging_pool; 221 VKStagingBufferPool staging_pool;
294 VKDescriptorPool descriptor_pool; 222 VKDescriptorPool descriptor_pool;
295 VKUpdateDescriptorQueue update_descriptor_queue; 223 VKUpdateDescriptorQueue update_descriptor_queue;
296 VKRenderPassCache renderpass_cache; 224 BlitImageHelper blit_image;
297 QuadArrayPass quad_array_pass; 225 QuadArrayPass quad_array_pass;
298 QuadIndexedPass quad_indexed_pass; 226 QuadIndexedPass quad_indexed_pass;
299 Uint8Pass uint8_pass; 227 Uint8Pass uint8_pass;
300 228
301 VKTextureCache texture_cache; 229 TextureCacheRuntime texture_cache_runtime;
230 TextureCache texture_cache;
302 VKPipelineCache pipeline_cache; 231 VKPipelineCache pipeline_cache;
303 VKBufferCache buffer_cache; 232 VKBufferCache buffer_cache;
304 VKSamplerCache sampler_cache;
305 VKQueryCache query_cache; 233 VKQueryCache query_cache;
306 VKFenceManager fence_manager; 234 VKFenceManager fence_manager;
307 235
@@ -310,16 +238,11 @@ private:
310 vk::Event wfi_event; 238 vk::Event wfi_event;
311 VideoCommon::Shader::AsyncShaders async_shaders; 239 VideoCommon::Shader::AsyncShaders async_shaders;
312 240
313 ColorAttachments color_attachments; 241 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
314 ZetaAttachment zeta_attachment; 242 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
315 243 boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
316 std::vector<ImageView> sampled_views;
317 std::vector<ImageView> image_views;
318 244
319 u32 draw_counter = 0; 245 u32 draw_counter = 0;
320
321 // TODO(Rodrigo): Invalidate on image destruction
322 std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
323}; 246};
324 247
325} // namespace Vulkan 248} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
deleted file mode 100644
index e812c7dd6..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <memory>
7#include <vector>
8
9#include "common/cityhash.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15
16namespace Vulkan {
17
18std::size_t RenderPassParams::Hash() const noexcept {
19 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
20 return static_cast<std::size_t>(hash);
21}
22
23bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
24 return std::memcmp(&rhs, this, sizeof *this) == 0;
25}
26
27VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {}
28
29VKRenderPassCache::~VKRenderPassCache() = default;
30
31VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
32 const auto [pair, is_cache_miss] = cache.try_emplace(params);
33 auto& entry = pair->second;
34 if (is_cache_miss) {
35 entry = CreateRenderPass(params);
36 }
37 return *entry;
38}
39
40vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
41 using namespace VideoCore::Surface;
42 const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
43
44 std::vector<VkAttachmentDescription> descriptors;
45 descriptors.reserve(num_attachments);
46
47 std::vector<VkAttachmentReference> color_references;
48 color_references.reserve(num_attachments);
49
50 for (std::size_t rt = 0; rt < num_attachments; ++rt) {
51 const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
52 const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
53 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
54 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
55 static_cast<int>(pixel_format));
56
57 // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
58 const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
59 ? VK_IMAGE_LAYOUT_GENERAL
60 : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
61 descriptors.push_back({
62 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
63 .format = format.format,
64 .samples = VK_SAMPLE_COUNT_1_BIT,
65 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
66 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
67 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
68 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
69 .initialLayout = color_layout,
70 .finalLayout = color_layout,
71 });
72
73 color_references.push_back({
74 .attachment = static_cast<u32>(rt),
75 .layout = color_layout,
76 });
77 }
78
79 VkAttachmentReference zeta_attachment_ref;
80 const bool has_zeta = params.zeta_format != 0;
81 if (has_zeta) {
82 const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
83 const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
84 const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
85 ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
86 static_cast<int>(pixel_format));
87
88 const VkImageLayout zeta_layout = params.zeta_texception != 0
89 ? VK_IMAGE_LAYOUT_GENERAL
90 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
91 descriptors.push_back({
92 .flags = 0,
93 .format = format.format,
94 .samples = VK_SAMPLE_COUNT_1_BIT,
95 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
96 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
97 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
98 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
99 .initialLayout = zeta_layout,
100 .finalLayout = zeta_layout,
101 });
102
103 zeta_attachment_ref = {
104 .attachment = static_cast<u32>(num_attachments),
105 .layout = zeta_layout,
106 };
107 }
108
109 const VkSubpassDescription subpass_description{
110 .flags = 0,
111 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
112 .inputAttachmentCount = 0,
113 .pInputAttachments = nullptr,
114 .colorAttachmentCount = static_cast<u32>(color_references.size()),
115 .pColorAttachments = color_references.data(),
116 .pResolveAttachments = nullptr,
117 .pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
118 .preserveAttachmentCount = 0,
119 .pPreserveAttachments = nullptr,
120 };
121
122 VkAccessFlags access = 0;
123 VkPipelineStageFlags stage = 0;
124 if (!color_references.empty()) {
125 access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
126 stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
127 }
128
129 if (has_zeta) {
130 access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
131 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
132 stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
133 }
134
135 const VkSubpassDependency subpass_dependency{
136 .srcSubpass = VK_SUBPASS_EXTERNAL,
137 .dstSubpass = 0,
138 .srcStageMask = stage,
139 .dstStageMask = stage,
140 .srcAccessMask = 0,
141 .dstAccessMask = access,
142 .dependencyFlags = 0,
143 };
144
145 return device.GetLogical().CreateRenderPass({
146 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
147 .pNext = nullptr,
148 .flags = 0,
149 .attachmentCount = static_cast<u32>(descriptors.size()),
150 .pAttachments = descriptors.data(),
151 .subpassCount = 1,
152 .pSubpasses = &subpass_description,
153 .dependencyCount = 1,
154 .pDependencies = &subpass_dependency,
155 });
156}
157
158} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
deleted file mode 100644
index 652ecef7b..000000000
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ /dev/null
@@ -1,70 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <unordered_map>
9
10#include <boost/container/static_vector.hpp>
11#include <boost/functional/hash.hpp>
12
13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/surface.h"
16
17namespace Vulkan {
18
19class VKDevice;
20
21struct RenderPassParams {
22 std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
23 u8 num_color_attachments;
24 u8 texceptions;
25
26 u8 zeta_format;
27 u8 zeta_texception;
28
29 std::size_t Hash() const noexcept;
30
31 bool operator==(const RenderPassParams& rhs) const noexcept;
32
33 bool operator!=(const RenderPassParams& rhs) const noexcept {
34 return !operator==(rhs);
35 }
36};
37static_assert(std::has_unique_object_representations_v<RenderPassParams>);
38static_assert(std::is_trivially_copyable_v<RenderPassParams>);
39static_assert(std::is_trivially_constructible_v<RenderPassParams>);
40
41} // namespace Vulkan
42
43namespace std {
44
45template <>
46struct hash<Vulkan::RenderPassParams> {
47 std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
48 return k.Hash();
49 }
50};
51
52} // namespace std
53
54namespace Vulkan {
55
56class VKRenderPassCache final {
57public:
58 explicit VKRenderPassCache(const VKDevice& device_);
59 ~VKRenderPassCache();
60
61 VkRenderPass GetRenderPass(const RenderPassParams& params);
62
63private:
64 vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
65
66 const VKDevice& device;
67 std::unordered_map<RenderPassParams, vk::RenderPass> cache;
68};
69
70} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
deleted file mode 100644
index b859691fa..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include "video_core/renderer_vulkan/maxwell_to_vk.h"
8#include "video_core/renderer_vulkan/vk_sampler_cache.h"
9#include "video_core/renderer_vulkan/wrapper.h"
10#include "video_core/textures/texture.h"
11
12using Tegra::Texture::TextureMipmapFilter;
13
14namespace Vulkan {
15
16namespace {
17
18VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
19 // TODO(Rodrigo): Manage integer border colors
20 if (color == std::array<float, 4>{0, 0, 0, 0}) {
21 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
22 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
23 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
24 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
25 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
26 }
27 if (color[0] + color[1] + color[2] > 1.35f) {
28 // If color elements are brighter than roughly 0.5 average, use white border
29 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
30 } else if (color[3] > 0.5f) {
31 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
32 } else {
33 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
34 }
35}
36
37} // Anonymous namespace
38
39VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {}
40
41VKSamplerCache::~VKSamplerCache() = default;
42
43vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
44 const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
45 const std::array color = tsc.GetBorderColor();
46
47 VkSamplerCustomBorderColorCreateInfoEXT border{
48 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
49 .pNext = nullptr,
50 .customBorderColor = {},
51 .format = VK_FORMAT_UNDEFINED,
52 };
53 std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
54
55 return device.GetLogical().CreateSampler({
56 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
57 .pNext = arbitrary_borders ? &border : nullptr,
58 .flags = 0,
59 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
60 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
61 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
62 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
63 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
64 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
65 .mipLodBias = tsc.GetLodBias(),
66 .anisotropyEnable =
67 static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
68 .maxAnisotropy = tsc.GetMaxAnisotropy(),
69 .compareEnable = tsc.depth_compare_enabled,
70 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
71 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
72 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
73 .borderColor =
74 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
75 .unnormalizedCoordinates = VK_FALSE,
76 });
77}
78
79VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
80 return *sampler;
81}
82
83} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
deleted file mode 100644
index 3f22c4610..000000000
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ /dev/null
@@ -1,29 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/renderer_vulkan/wrapper.h"
8#include "video_core/sampler_cache.h"
9#include "video_core/textures/texture.h"
10
11namespace Vulkan {
12
13class VKDevice;
14
15class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
16public:
17 explicit VKSamplerCache(const VKDevice& device_);
18 ~VKSamplerCache();
19
20protected:
21 vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
22
23 VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
24
25private:
26 const VKDevice& device;
27};
28
29} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 1a483dc71..c104c6fe3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -16,6 +16,7 @@
16#include "video_core/renderer_vulkan/vk_query_cache.h" 16#include "video_core/renderer_vulkan/vk_query_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h" 18#include "video_core/renderer_vulkan/vk_state_tracker.h"
19#include "video_core/renderer_vulkan/vk_texture_cache.h"
19#include "video_core/renderer_vulkan/wrapper.h" 20#include "video_core/renderer_vulkan/wrapper.h"
20 21
21namespace Vulkan { 22namespace Vulkan {
@@ -96,38 +97,39 @@ void VKScheduler::DispatchWork() {
96 AcquireNewChunk(); 97 AcquireNewChunk();
97} 98}
98 99
99void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 100void VKScheduler::RequestRenderpass(const Framebuffer* framebuffer) {
100 VkExtent2D render_area) { 101 const VkRenderPass renderpass = framebuffer->RenderPass();
101 if (renderpass == state.renderpass && framebuffer == state.framebuffer && 102 const VkFramebuffer framebuffer_handle = framebuffer->Handle();
103 const VkExtent2D render_area = framebuffer->RenderArea();
104 if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
102 render_area.width == state.render_area.width && 105 render_area.width == state.render_area.width &&
103 render_area.height == state.render_area.height) { 106 render_area.height == state.render_area.height) {
104 return; 107 return;
105 } 108 }
106 const bool end_renderpass = state.renderpass != nullptr; 109 EndRenderPass();
107 state.renderpass = renderpass; 110 state.renderpass = renderpass;
108 state.framebuffer = framebuffer; 111 state.framebuffer = framebuffer_handle;
109 state.render_area = render_area; 112 state.render_area = render_area;
110 113
111 const VkRenderPassBeginInfo renderpass_bi{ 114 Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
112 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 115 const VkRenderPassBeginInfo renderpass_bi{
113 .pNext = nullptr, 116 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
114 .renderPass = renderpass, 117 .pNext = nullptr,
115 .framebuffer = framebuffer, 118 .renderPass = renderpass,
116 .renderArea = 119 .framebuffer = framebuffer_handle,
117 { 120 .renderArea =
118 .offset = {.x = 0, .y = 0}, 121 {
119 .extent = render_area, 122 .offset = {.x = 0, .y = 0},
120 }, 123 .extent = render_area,
121 .clearValueCount = 0, 124 },
122 .pClearValues = nullptr, 125 .clearValueCount = 0,
123 }; 126 .pClearValues = nullptr,
124 127 };
125 Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
126 if (end_renderpass) {
127 cmdbuf.EndRenderPass();
128 }
129 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 128 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
130 }); 129 });
130 num_renderpass_images = framebuffer->NumImages();
131 renderpass_images = framebuffer->Images();
132 renderpass_image_ranges = framebuffer->ImageRanges();
131} 133}
132 134
133void VKScheduler::RequestOutsideRenderPassOperationContext() { 135void VKScheduler::RequestOutsideRenderPassOperationContext() {
@@ -241,8 +243,37 @@ void VKScheduler::EndRenderPass() {
241 if (!state.renderpass) { 243 if (!state.renderpass) {
242 return; 244 return;
243 } 245 }
246 Record([num_images = num_renderpass_images, images = renderpass_images,
247 ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
248 std::array<VkImageMemoryBarrier, 9> barriers;
249 for (size_t i = 0; i < num_images; ++i) {
250 barriers[i] = VkImageMemoryBarrier{
251 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
252 .pNext = nullptr,
253 .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
254 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
255 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
256 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
257 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
258 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
259 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
260 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
261 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
262 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
263 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
264 .image = images[i],
265 .subresourceRange = ranges[i],
266 };
267 }
268 cmdbuf.EndRenderPass();
269 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
270 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
271 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
272 VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr,
273 vk::Span(barriers.data(), num_images));
274 });
244 state.renderpass = nullptr; 275 state.renderpass = nullptr;
245 Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); 276 num_renderpass_images = 0;
246} 277}
247 278
248void VKScheduler::AcquireNewChunk() { 279void VKScheduler::AcquireNewChunk() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 6d3a5da0b..0a36c8fad 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -17,6 +17,7 @@
17namespace Vulkan { 17namespace Vulkan {
18 18
19class CommandPool; 19class CommandPool;
20class Framebuffer;
20class MasterSemaphore; 21class MasterSemaphore;
21class StateTracker; 22class StateTracker;
22class VKDevice; 23class VKDevice;
@@ -52,8 +53,7 @@ public:
52 void DispatchWork(); 53 void DispatchWork();
53 54
54 /// Requests to begin a renderpass. 55 /// Requests to begin a renderpass.
55 void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, 56 void RequestRenderpass(const Framebuffer* framebuffer);
56 VkExtent2D render_area);
57 57
58 /// Requests the current executino context to be able to execute operations only allowed outside 58 /// Requests the current executino context to be able to execute operations only allowed outside
59 /// of a renderpass. 59 /// of a renderpass.
@@ -62,6 +62,9 @@ public:
62 /// Binds a pipeline to the current execution context. 62 /// Binds a pipeline to the current execution context.
63 void BindGraphicsPipeline(VkPipeline pipeline); 63 void BindGraphicsPipeline(VkPipeline pipeline);
64 64
65 /// Invalidates current command buffer state except for render passes
66 void InvalidateState();
67
65 /// Assigns the query cache. 68 /// Assigns the query cache.
66 void SetQueryCache(VKQueryCache& query_cache_) { 69 void SetQueryCache(VKQueryCache& query_cache_) {
67 query_cache = &query_cache_; 70 query_cache = &query_cache_;
@@ -170,8 +173,6 @@ private:
170 173
171 void AllocateNewContext(); 174 void AllocateNewContext();
172 175
173 void InvalidateState();
174
175 void EndPendingOperations(); 176 void EndPendingOperations();
176 177
177 void EndRenderPass(); 178 void EndRenderPass();
@@ -192,6 +193,11 @@ private:
192 std::thread worker_thread; 193 std::thread worker_thread;
193 194
194 State state; 195 State state;
196
197 u32 num_renderpass_images = 0;
198 std::array<VkImage, 9> renderpass_images{};
199 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
200
195 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 201 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
196 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 202 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
197 std::mutex mutex; 203 std::mutex mutex;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 72954d0e3..09d6f9f35 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -102,7 +102,7 @@ struct GenericVaryingDescription {
102 bool is_scalar = false; 102 bool is_scalar = false;
103}; 103};
104 104
105spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer); 106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) { 107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D: 108 case Tegra::Shader::TextureType::Texture1D:
@@ -119,7 +119,7 @@ spv::Dim GetSamplerDim(const Sampler& sampler) {
119 } 119 }
120} 120}
121 121
122std::pair<spv::Dim, bool> GetImageDim(const Image& image) { 122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) { 123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D: 124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false}; 125 return {spv::Dim::Dim1D, false};
@@ -980,7 +980,7 @@ private:
980 return binding; 980 return binding;
981 } 981 }
982 982
983 void DeclareImage(const Image& image, u32& binding) { 983 void DeclareImage(const ImageEntry& image, u32& binding) {
984 const auto [dim, arrayed] = GetImageDim(image); 984 const auto [dim, arrayed] = GetImageDim(image);
985 constexpr int depth = 0; 985 constexpr int depth = 0;
986 constexpr bool ms = false; 986 constexpr bool ms = false;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index df1812514..ad91ad5de 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,10 +21,10 @@ class VKDevice;
21namespace Vulkan { 21namespace Vulkan {
22 22
23using Maxwell = Tegra::Engines::Maxwell3D::Regs; 23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24using UniformTexelEntry = VideoCommon::Shader::Sampler; 24using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
25using SamplerEntry = VideoCommon::Shader::Sampler; 25using SamplerEntry = VideoCommon::Shader::SamplerEntry;
26using StorageTexelEntry = VideoCommon::Shader::Image; 26using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
27using ImageEntry = VideoCommon::Shader::Image; 27using ImageEntry = VideoCommon::Shader::ImageEntry;
28 28
29constexpr u32 DESCRIPTOR_SET = 0; 29constexpr u32 DESCRIPTOR_SET = 0;
30 30
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index c1a218d76..38a0be7f2 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -13,18 +13,13 @@
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
16vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { 16vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code) {
17 // Avoid undefined behavior by copying to a staging allocation
18 ASSERT(code_size % sizeof(u32) == 0);
19 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
20 std::memcpy(data.get(), code_data, code_size);
21
22 return device.GetLogical().CreateShaderModule({ 17 return device.GetLogical().CreateShaderModule({
23 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 18 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
24 .pNext = nullptr, 19 .pNext = nullptr,
25 .flags = 0, 20 .flags = 0,
26 .codeSize = code_size, 21 .codeSize = static_cast<u32>(code.size_bytes()),
27 .pCode = data.get(), 22 .pCode = code.data(),
28 }); 23 });
29} 24}
30 25
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index d1d3f3cae..dce34a140 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -4,6 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <span>
8
7#include "common/common_types.h" 9#include "common/common_types.h"
8#include "video_core/renderer_vulkan/wrapper.h" 10#include "video_core/renderer_vulkan/wrapper.h"
9 11
@@ -11,6 +13,6 @@ namespace Vulkan {
11 13
12class VKDevice; 14class VKDevice;
13 15
14vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); 16vk::ShaderModule BuildShader(const VKDevice& device, std::span<const u32> code);
15 17
16} // namespace Vulkan 18} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 50164cc08..1779a2e30 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array>
6#include <cstddef> 7#include <cstddef>
7#include <iterator> 8#include <iterator>
8 9
@@ -29,21 +30,15 @@ using Table = Maxwell3D::DirtyState::Table;
29using Flags = Maxwell3D::DirtyState::Flags; 30using Flags = Maxwell3D::DirtyState::Flags;
30 31
31Flags MakeInvalidationFlags() { 32Flags MakeInvalidationFlags() {
33 static constexpr std::array INVALIDATION_FLAGS{
34 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
35 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable,
36 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
37 };
32 Flags flags{}; 38 Flags flags{};
33 flags[Viewports] = true; 39 for (const int flag : INVALIDATION_FLAGS) {
34 flags[Scissors] = true; 40 flags[flag] = true;
35 flags[DepthBias] = true; 41 }
36 flags[BlendConstants] = true;
37 flags[DepthBounds] = true;
38 flags[StencilProperties] = true;
39 flags[CullMode] = true;
40 flags[DepthBoundsEnable] = true;
41 flags[DepthTestEnable] = true;
42 flags[DepthWriteEnable] = true;
43 flags[DepthCompareOp] = true;
44 flags[FrontFace] = true;
45 flags[StencilOp] = true;
46 flags[StencilTestEnable] = true;
47 return flags; 42 return flags;
48} 43}
49 44
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 1de789e57..c335d2bdf 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -52,6 +52,14 @@ public:
52 current_topology = INVALID_TOPOLOGY; 52 current_topology = INVALID_TOPOLOGY;
53 } 53 }
54 54
55 void InvalidateViewports() {
56 flags[Dirty::Viewports] = true;
57 }
58
59 void InvalidateScissors() {
60 flags[Dirty::Scissors] = true;
61 }
62
55 bool TouchViewports() { 63 bool TouchViewports() {
56 return Exchange(Dirty::Viewports, false); 64 return Exchange(Dirty::Viewports, false);
57 } 65 }
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 1b59612b9..419cb154d 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -19,6 +19,10 @@ namespace Vulkan {
19 19
20namespace { 20namespace {
21 21
22constexpr VkBufferUsageFlags BUFFER_USAGE =
23 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
24 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
25
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 26constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 27constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 28
@@ -56,17 +60,16 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
56 60
57} // Anonymous namespace 61} // Anonymous namespace
58 62
59VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, 63VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_)
60 VkBufferUsageFlags usage)
61 : device{device_}, scheduler{scheduler_} { 64 : device{device_}, scheduler{scheduler_} {
62 CreateBuffers(usage); 65 CreateBuffers();
63 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); 66 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
64 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); 67 ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
65} 68}
66 69
67VKStreamBuffer::~VKStreamBuffer() = default; 70VKStreamBuffer::~VKStreamBuffer() = default;
68 71
69std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 72std::pair<u8*, u64> VKStreamBuffer::Map(u64 size, u64 alignment) {
70 ASSERT(size <= stream_buffer_size); 73 ASSERT(size <= stream_buffer_size);
71 mapped_size = size; 74 mapped_size = size;
72 75
@@ -76,7 +79,6 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
76 79
77 WaitPendingOperations(offset); 80 WaitPendingOperations(offset);
78 81
79 bool invalidated = false;
80 if (offset + size > stream_buffer_size) { 82 if (offset + size > stream_buffer_size) {
81 // The buffer would overflow, save the amount of used watches and reset the state. 83 // The buffer would overflow, save the amount of used watches and reset the state.
82 invalidation_mark = current_watch_cursor; 84 invalidation_mark = current_watch_cursor;
@@ -90,11 +92,9 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
90 92
91 // Ensure that we don't wait for uncommitted fences. 93 // Ensure that we don't wait for uncommitted fences.
92 scheduler.Flush(); 94 scheduler.Flush();
93
94 invalidated = true;
95 } 95 }
96 96
97 return {memory.Map(offset, size), offset, invalidated}; 97 return std::make_pair(memory.Map(offset, size), offset);
98} 98}
99 99
100void VKStreamBuffer::Unmap(u64 size) { 100void VKStreamBuffer::Unmap(u64 size) {
@@ -113,7 +113,7 @@ void VKStreamBuffer::Unmap(u64 size) {
113 watch.tick = scheduler.CurrentTick(); 113 watch.tick = scheduler.CurrentTick();
114} 114}
115 115
116void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 116void VKStreamBuffer::CreateBuffers() {
117 const auto memory_properties = device.GetPhysical().GetMemoryProperties(); 117 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
118 const u32 preferred_type = GetMemoryType(memory_properties); 118 const u32 preferred_type = GetMemoryType(memory_properties);
119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; 119 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
@@ -127,7 +127,7 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
127 .pNext = nullptr, 127 .pNext = nullptr,
128 .flags = 0, 128 .flags = 0,
129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size), 129 .size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size),
130 .usage = usage, 130 .usage = BUFFER_USAGE,
131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 131 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
132 .queueFamilyIndexCount = 0, 132 .queueFamilyIndexCount = 0,
133 .pQueueFamilyIndices = nullptr, 133 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 5e15ad78f..1428f77bf 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -5,7 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <optional> 7#include <optional>
8#include <tuple> 8#include <utility>
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
@@ -19,17 +19,15 @@ class VKScheduler;
19 19
20class VKStreamBuffer final { 20class VKStreamBuffer final {
21public: 21public:
22 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 22 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler);
23 VkBufferUsageFlags usage);
24 ~VKStreamBuffer(); 23 ~VKStreamBuffer();
25 24
26 /** 25 /**
27 * Reserves a region of memory from the stream buffer. 26 * Reserves a region of memory from the stream buffer.
28 * @param size Size to reserve. 27 * @param size Size to reserve.
29 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer 28 * @returns A pair of a raw memory pointer (with offset added), and the buffer offset
30 * offset and a boolean that's true when buffer has been invalidated.
31 */ 29 */
32 std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment); 30 std::pair<u8*, u64> Map(u64 size, u64 alignment);
33 31
34 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 32 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
35 void Unmap(u64 size); 33 void Unmap(u64 size);
@@ -49,7 +47,7 @@ private:
49 }; 47 };
50 48
51 /// Creates Vulkan buffer handles committing the required the required memory. 49 /// Creates Vulkan buffer handles committing the required the required memory.
52 void CreateBuffers(VkBufferUsageFlags usage); 50 void CreateBuffers();
53 51
54 /// Increases the amount of watches available. 52 /// Increases the amount of watches available.
55 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); 53 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ae2e3322c..261808391 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -4,614 +4,1103 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <cstddef> 7#include <span>
8#include <cstring>
9#include <memory>
10#include <variant>
11#include <vector> 8#include <vector>
12 9
13#include "common/assert.h" 10#include "video_core/engines/fermi_2d.h"
14#include "common/common_types.h" 11#include "video_core/renderer_vulkan/blit_image.h"
15#include "core/core.h"
16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/morton.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
20#include "video_core/renderer_vulkan/vk_memory_manager.h"
21#include "video_core/renderer_vulkan/vk_rasterizer.h"
22#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
23#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
24#include "video_core/renderer_vulkan/vk_texture_cache.h" 16#include "video_core/renderer_vulkan/vk_texture_cache.h"
25#include "video_core/renderer_vulkan/wrapper.h" 17#include "video_core/renderer_vulkan/wrapper.h"
26#include "video_core/surface.h"
27 18
28namespace Vulkan { 19namespace Vulkan {
29 20
30using VideoCore::MortonSwizzle; 21using Tegra::Engines::Fermi2D;
31using VideoCore::MortonSwizzleMode;
32
33using Tegra::Texture::SwizzleSource; 22using Tegra::Texture::SwizzleSource;
34using VideoCore::Surface::PixelFormat; 23using Tegra::Texture::TextureMipmapFilter;
35using VideoCore::Surface::SurfaceTarget; 24using VideoCommon::BufferImageCopy;
25using VideoCommon::ImageInfo;
26using VideoCommon::ImageType;
27using VideoCommon::SubresourceRange;
28using VideoCore::Surface::IsPixelFormatASTC;
36 29
37namespace { 30namespace {
38 31
39VkImageType SurfaceTargetToImage(SurfaceTarget target) { 32constexpr std::array ATTACHMENT_REFERENCES{
40 switch (target) { 33 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
41 case SurfaceTarget::Texture1D: 34 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
42 case SurfaceTarget::Texture1DArray: 35 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
36 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
37 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
38 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
39 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
42};
43
44constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
45 if (color == std::array<float, 4>{0, 0, 0, 0}) {
46 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
47 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
48 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
49 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
50 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
51 }
52 if (color[0] + color[1] + color[2] > 1.35f) {
53 // If color elements are brighter than roughly 0.5 average, use white border
54 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
55 } else if (color[3] > 0.5f) {
56 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
57 } else {
58 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
59 }
60}
61
62[[nodiscard]] VkImageType ConvertImageType(const ImageType type) {
63 switch (type) {
64 case ImageType::e1D:
43 return VK_IMAGE_TYPE_1D; 65 return VK_IMAGE_TYPE_1D;
44 case SurfaceTarget::Texture2D: 66 case ImageType::e2D:
45 case SurfaceTarget::Texture2DArray: 67 case ImageType::Linear:
46 case SurfaceTarget::TextureCubemap:
47 case SurfaceTarget::TextureCubeArray:
48 return VK_IMAGE_TYPE_2D; 68 return VK_IMAGE_TYPE_2D;
49 case SurfaceTarget::Texture3D: 69 case ImageType::e3D:
50 return VK_IMAGE_TYPE_3D; 70 return VK_IMAGE_TYPE_3D;
51 case SurfaceTarget::TextureBuffer: 71 case ImageType::Buffer:
52 UNREACHABLE(); 72 break;
53 return {};
54 } 73 }
55 UNREACHABLE_MSG("Unknown texture target={}", target); 74 UNREACHABLE_MSG("Invalid image type={}", type);
56 return {}; 75 return {};
57} 76}
58 77
59VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { 78[[nodiscard]] VkSampleCountFlagBits ConvertSampleCount(u32 num_samples) {
60 if (pixel_format < PixelFormat::MaxColorFormat) { 79 switch (num_samples) {
61 return VK_IMAGE_ASPECT_COLOR_BIT; 80 case 1:
62 } else if (pixel_format < PixelFormat::MaxDepthFormat) { 81 return VK_SAMPLE_COUNT_1_BIT;
63 return VK_IMAGE_ASPECT_DEPTH_BIT; 82 case 2:
64 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { 83 return VK_SAMPLE_COUNT_2_BIT;
65 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; 84 case 4:
66 } else { 85 return VK_SAMPLE_COUNT_4_BIT;
67 UNREACHABLE_MSG("Invalid pixel format={}", pixel_format); 86 case 8:
68 return VK_IMAGE_ASPECT_COLOR_BIT; 87 return VK_SAMPLE_COUNT_8_BIT;
88 case 16:
89 return VK_SAMPLE_COUNT_16_BIT;
90 default:
91 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
92 return VK_SAMPLE_COUNT_1_BIT;
69 } 93 }
70} 94}
71 95
72VkImageViewType GetImageViewType(SurfaceTarget target) { 96[[nodiscard]] VkImageCreateInfo MakeImageCreateInfo(const VKDevice& device, const ImageInfo& info) {
73 switch (target) { 97 const auto format_info = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, info.format);
74 case SurfaceTarget::Texture1D: 98 VkImageCreateFlags flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
75 return VK_IMAGE_VIEW_TYPE_1D; 99 if (info.type == ImageType::e2D && info.resources.layers >= 6 &&
76 case SurfaceTarget::Texture2D: 100 info.size.width == info.size.height) {
77 return VK_IMAGE_VIEW_TYPE_2D; 101 flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
78 case SurfaceTarget::Texture3D:
79 return VK_IMAGE_VIEW_TYPE_3D;
80 case SurfaceTarget::Texture1DArray:
81 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
82 case SurfaceTarget::Texture2DArray:
83 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
84 case SurfaceTarget::TextureCubemap:
85 return VK_IMAGE_VIEW_TYPE_CUBE;
86 case SurfaceTarget::TextureCubeArray:
87 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
88 case SurfaceTarget::TextureBuffer:
89 break;
90 } 102 }
91 UNREACHABLE(); 103 if (info.type == ImageType::e3D) {
92 return {}; 104 flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
93} 105 }
94 106 VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
95vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, 107 VK_IMAGE_USAGE_SAMPLED_BIT;
96 std::size_t host_memory_size) { 108 if (format_info.attachable) {
97 // TODO(Rodrigo): Move texture buffer creation to the buffer cache 109 switch (VideoCore::Surface::GetFormatType(info.format)) {
98 return device.GetLogical().CreateBuffer({ 110 case VideoCore::Surface::SurfaceType::ColorTexture:
99 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 111 usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
112 break;
113 case VideoCore::Surface::SurfaceType::Depth:
114 case VideoCore::Surface::SurfaceType::DepthStencil:
115 usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
116 break;
117 default:
118 UNREACHABLE_MSG("Invalid surface type");
119 }
120 }
121 if (format_info.storage) {
122 usage |= VK_IMAGE_USAGE_STORAGE_BIT;
123 }
124 const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples);
125 return VkImageCreateInfo{
126 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
100 .pNext = nullptr, 127 .pNext = nullptr,
101 .flags = 0, 128 .flags = flags,
102 .size = static_cast<VkDeviceSize>(host_memory_size), 129 .imageType = ConvertImageType(info.type),
103 .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | 130 .format = format_info.format,
104 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 131 .extent =
105 VK_BUFFER_USAGE_TRANSFER_DST_BIT, 132 {
133 .width = info.size.width >> samples_x,
134 .height = info.size.height >> samples_y,
135 .depth = info.size.depth,
136 },
137 .mipLevels = static_cast<u32>(info.resources.levels),
138 .arrayLayers = static_cast<u32>(info.resources.layers),
139 .samples = ConvertSampleCount(info.num_samples),
140 .tiling = VK_IMAGE_TILING_OPTIMAL,
141 .usage = usage,
106 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 142 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
107 .queueFamilyIndexCount = 0, 143 .queueFamilyIndexCount = 0,
108 .pQueueFamilyIndices = nullptr, 144 .pQueueFamilyIndices = nullptr,
109 }); 145 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
110}
111
112VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
113 const SurfaceParams& params, VkBuffer buffer,
114 std::size_t host_memory_size) {
115 ASSERT(params.IsBuffer());
116
117 return {
118 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
119 .pNext = nullptr,
120 .flags = 0,
121 .buffer = buffer,
122 .format =
123 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format,
124 .offset = 0,
125 .range = static_cast<VkDeviceSize>(host_memory_size),
126 }; 146 };
127} 147}
128 148
129VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { 149[[nodiscard]] vk::Image MakeImage(const VKDevice& device, const ImageInfo& info) {
130 ASSERT(!params.IsBuffer()); 150 if (info.type == ImageType::Buffer) {
131 151 return vk::Image{};
132 const auto [format, attachable, storage] = 152 }
133 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); 153 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
154}
134 155
135 VkImageCreateInfo ci{ 156[[nodiscard]] vk::Buffer MakeBuffer(const VKDevice& device, const ImageInfo& info) {
136 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 157 if (info.type != ImageType::Buffer) {
158 return vk::Buffer{};
159 }
160 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
161 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
162 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
137 .pNext = nullptr, 163 .pNext = nullptr,
138 .flags = 0, 164 .flags = 0,
139 .imageType = SurfaceTargetToImage(params.target), 165 .size = info.size.width * bytes_per_block,
140 .format = format, 166 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
141 .extent = {}, 167 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
142 .mipLevels = params.num_levels, 168 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
143 .arrayLayers = static_cast<u32>(params.GetNumLayers()),
144 .samples = VK_SAMPLE_COUNT_1_BIT,
145 .tiling = VK_IMAGE_TILING_OPTIMAL,
146 .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
147 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
148 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 169 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
149 .queueFamilyIndexCount = 0, 170 .queueFamilyIndexCount = 0,
150 .pQueueFamilyIndices = nullptr, 171 .pQueueFamilyIndices = nullptr,
151 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 172 });
152 };
153 if (attachable) {
154 ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
155 : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
156 }
157 if (storage) {
158 ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
159 }
160
161 switch (params.target) {
162 case SurfaceTarget::TextureCubemap:
163 case SurfaceTarget::TextureCubeArray:
164 ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
165 [[fallthrough]];
166 case SurfaceTarget::Texture1D:
167 case SurfaceTarget::Texture1DArray:
168 case SurfaceTarget::Texture2D:
169 case SurfaceTarget::Texture2DArray:
170 ci.extent = {params.width, params.height, 1};
171 break;
172 case SurfaceTarget::Texture3D:
173 ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
174 ci.extent = {params.width, params.height, params.depth};
175 break;
176 case SurfaceTarget::TextureBuffer:
177 UNREACHABLE();
178 }
179
180 return ci;
181} 173}
182 174
183u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, 175[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
184 SwizzleSource w_source) { 176 switch (VideoCore::Surface::GetFormatType(format)) {
185 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | 177 case VideoCore::Surface::SurfaceType::ColorTexture:
186 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); 178 return VK_IMAGE_ASPECT_COLOR_BIT;
179 case VideoCore::Surface::SurfaceType::Depth:
180 return VK_IMAGE_ASPECT_DEPTH_BIT;
181 case VideoCore::Surface::SurfaceType::DepthStencil:
182 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
183 default:
184 UNREACHABLE_MSG("Invalid surface type");
185 return VkImageAspectFlags{};
186 }
187} 187}
188 188
189} // Anonymous namespace 189[[nodiscard]] VkImageAspectFlags ImageViewAspectMask(const VideoCommon::ImageViewInfo& info) {
190 190 if (info.IsRenderTarget()) {
191CachedSurface::CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, 191 return ImageAspectMask(info.format);
192 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_,
193 GPUVAddr gpu_addr_, const SurfaceParams& params_)
194 : SurfaceBase<View>{gpu_addr_, params_, device_.IsOptimalAstcSupported()}, device{device_},
195 memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{staging_pool_} {
196 if (params.IsBuffer()) {
197 buffer = CreateBuffer(device, params, host_memory_size);
198 commit = memory_manager.Commit(buffer, false);
199
200 const auto buffer_view_ci =
201 GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
202 format = buffer_view_ci.format;
203
204 buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci);
205 } else {
206 const auto image_ci = GenerateImageCreateInfo(device, params);
207 format = image_ci.format;
208
209 image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
210 commit = memory_manager.Commit(image->GetHandle(), false);
211 } 192 }
212 193 const bool is_first = info.Swizzle()[0] == SwizzleSource::R;
213 // TODO(Rodrigo): Move this to a virtual function. 194 switch (info.format) {
214 u32 num_layers = 1; 195 case PixelFormat::D24_UNORM_S8_UINT:
215 if (params.is_layered || params.target == SurfaceTarget::Texture3D) { 196 case PixelFormat::D32_FLOAT_S8_UINT:
216 num_layers = params.depth; 197 return is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
198 case PixelFormat::S8_UINT_D24_UNORM:
199 return is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
200 case PixelFormat::D16_UNORM:
201 case PixelFormat::D32_FLOAT:
202 return VK_IMAGE_ASPECT_DEPTH_BIT;
203 default:
204 return VK_IMAGE_ASPECT_COLOR_BIT;
217 } 205 }
218 main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
219} 206}
220 207
221CachedSurface::~CachedSurface() = default; 208[[nodiscard]] VkAttachmentDescription AttachmentDescription(const VKDevice& device,
222 209 const ImageView* image_view) {
223void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { 210 const auto pixel_format = image_view->format;
224 // To upload data we have to be outside of a renderpass 211 return VkAttachmentDescription{
225 scheduler.RequestOutsideRenderPassOperationContext(); 212 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
213 .format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format).format,
214 .samples = image_view->Samples(),
215 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
216 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
217 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
218 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
219 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
220 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
221 };
222}
226 223
227 if (params.IsBuffer()) { 224[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
228 UploadBuffer(staging_buffer); 225 switch (swizzle) {
229 } else { 226 case SwizzleSource::Zero:
230 UploadImage(staging_buffer); 227 return VK_COMPONENT_SWIZZLE_ZERO;
228 case SwizzleSource::R:
229 return VK_COMPONENT_SWIZZLE_R;
230 case SwizzleSource::G:
231 return VK_COMPONENT_SWIZZLE_G;
232 case SwizzleSource::B:
233 return VK_COMPONENT_SWIZZLE_B;
234 case SwizzleSource::A:
235 return VK_COMPONENT_SWIZZLE_A;
236 case SwizzleSource::OneFloat:
237 case SwizzleSource::OneInt:
238 return VK_COMPONENT_SWIZZLE_ONE;
231 } 239 }
240 UNREACHABLE_MSG("Invalid swizzle={}", swizzle);
241 return VK_COMPONENT_SWIZZLE_ZERO;
232} 242}
233 243
234void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { 244[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
235 UNIMPLEMENTED_IF(params.IsBuffer()); 245 switch (type) {
236 246 case VideoCommon::ImageViewType::e1D:
237 if (params.pixel_format == PixelFormat::A1B5G5R5_UNORM) { 247 return VK_IMAGE_VIEW_TYPE_1D;
238 LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed"); 248 case VideoCommon::ImageViewType::e2D:
249 return VK_IMAGE_VIEW_TYPE_2D;
250 case VideoCommon::ImageViewType::Cube:
251 return VK_IMAGE_VIEW_TYPE_CUBE;
252 case VideoCommon::ImageViewType::e3D:
253 return VK_IMAGE_VIEW_TYPE_3D;
254 case VideoCommon::ImageViewType::e1DArray:
255 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
256 case VideoCommon::ImageViewType::e2DArray:
257 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
258 case VideoCommon::ImageViewType::CubeArray:
259 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
260 case VideoCommon::ImageViewType::Rect:
261 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported");
262 return VK_IMAGE_VIEW_TYPE_2D;
263 case VideoCommon::ImageViewType::Buffer:
264 UNREACHABLE_MSG("Texture buffers can't be image views");
265 return VK_IMAGE_VIEW_TYPE_1D;
239 } 266 }
267 UNREACHABLE_MSG("Invalid image view type={}", type);
268 return VK_IMAGE_VIEW_TYPE_2D;
269}
240 270
241 // We can't copy images to buffers inside a renderpass 271[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers(
242 scheduler.RequestOutsideRenderPassOperationContext(); 272 VideoCommon::SubresourceLayers subresource, VkImageAspectFlags aspect_mask) {
273 return VkImageSubresourceLayers{
274 .aspectMask = aspect_mask,
275 .mipLevel = static_cast<u32>(subresource.base_level),
276 .baseArrayLayer = static_cast<u32>(subresource.base_layer),
277 .layerCount = static_cast<u32>(subresource.num_layers),
278 };
279}
243 280
244 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 281[[nodiscard]] VkOffset3D MakeOffset3D(VideoCommon::Offset3D offset3d) {
245 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 282 return VkOffset3D{
283 .x = offset3d.x,
284 .y = offset3d.y,
285 .z = offset3d.z,
286 };
287}
246 288
247 const auto& unused_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 289[[nodiscard]] VkExtent3D MakeExtent3D(VideoCommon::Extent3D extent3d) {
248 // TODO(Rodrigo): Do this in a single copy 290 return VkExtent3D{
249 for (u32 level = 0; level < params.num_levels; ++level) { 291 .width = static_cast<u32>(extent3d.width),
250 scheduler.Record([image = *image->GetHandle(), buffer = *unused_buffer.handle, 292 .height = static_cast<u32>(extent3d.height),
251 copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { 293 .depth = static_cast<u32>(extent3d.depth),
252 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); 294 };
253 }); 295}
254 }
255 scheduler.Finish();
256 296
257 // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy. 297[[nodiscard]] VkImageCopy MakeImageCopy(const VideoCommon::ImageCopy& copy,
258 std::memcpy(staging_buffer.data(), unused_buffer.commit->Map(host_memory_size), 298 VkImageAspectFlags aspect_mask) noexcept {
259 host_memory_size); 299 return VkImageCopy{
300 .srcSubresource = MakeImageSubresourceLayers(copy.src_subresource, aspect_mask),
301 .srcOffset = MakeOffset3D(copy.src_offset),
302 .dstSubresource = MakeImageSubresourceLayers(copy.dst_subresource, aspect_mask),
303 .dstOffset = MakeOffset3D(copy.dst_offset),
304 .extent = MakeExtent3D(copy.extent),
305 };
260} 306}
261 307
262void CachedSurface::DecorateSurfaceName() { 308[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
263 // TODO(Rodrigo): Add name decorations 309 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
310 std::vector<VkBufferCopy> result(copies.size());
311 std::ranges::transform(
312 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
313 return VkBufferCopy{
314 .srcOffset = static_cast<VkDeviceSize>(copy.src_offset + buffer_offset),
315 .dstOffset = static_cast<VkDeviceSize>(copy.dst_offset),
316 .size = static_cast<VkDeviceSize>(copy.size),
317 };
318 });
319 return result;
264} 320}
265 321
266View CachedSurface::CreateView(const ViewParams& view_params) { 322[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies(
267 // TODO(Rodrigo): Add name decorations 323 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
268 return views[view_params] = std::make_shared<CachedSurfaceView>(device, *this, view_params); 324 struct Maker {
325 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
326 return VkBufferImageCopy{
327 .bufferOffset = copy.buffer_offset + buffer_offset,
328 .bufferRowLength = copy.buffer_row_length,
329 .bufferImageHeight = copy.buffer_image_height,
330 .imageSubresource =
331 {
332 .aspectMask = aspect_mask,
333 .mipLevel = static_cast<u32>(copy.image_subresource.base_level),
334 .baseArrayLayer = static_cast<u32>(copy.image_subresource.base_layer),
335 .layerCount = static_cast<u32>(copy.image_subresource.num_layers),
336 },
337 .imageOffset =
338 {
339 .x = copy.image_offset.x,
340 .y = copy.image_offset.y,
341 .z = copy.image_offset.z,
342 },
343 .imageExtent =
344 {
345 .width = copy.image_extent.width,
346 .height = copy.image_extent.height,
347 .depth = copy.image_extent.depth,
348 },
349 };
350 }
351 size_t buffer_offset;
352 VkImageAspectFlags aspect_mask;
353 };
354 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
355 std::vector<VkBufferImageCopy> result(copies.size() * 2);
356 std::ranges::transform(copies, result.begin(),
357 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
358 std::ranges::transform(copies, result.begin() + copies.size(),
359 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
360 return result;
361 } else {
362 std::vector<VkBufferImageCopy> result(copies.size());
363 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
364 return result;
365 }
269} 366}
270 367
271void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { 368[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(VkImageAspectFlags aspect_mask,
272 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 369 const SubresourceRange& range) {
273 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 370 return VkImageSubresourceRange{
371 .aspectMask = aspect_mask,
372 .baseMipLevel = static_cast<u32>(range.base.level),
373 .levelCount = static_cast<u32>(range.extent.levels),
374 .baseArrayLayer = static_cast<u32>(range.base.layer),
375 .layerCount = static_cast<u32>(range.extent.layers),
376 };
377}
274 378
275 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, 379[[nodiscard]] VkImageSubresourceRange MakeSubresourceRange(const ImageView* image_view) {
276 size = host_memory_size](vk::CommandBuffer cmdbuf) { 380 SubresourceRange range = image_view->range;
277 VkBufferCopy copy; 381 if (True(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
278 copy.srcOffset = 0; 382 // Slice image views always affect a single layer, but their subresource range corresponds
279 copy.dstOffset = 0; 383 // to the slice. Override the value to affect a single layer.
280 copy.size = size; 384 range.base.layer = 0;
281 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); 385 range.extent.layers = 1;
386 }
387 return MakeSubresourceRange(ImageAspectMask(image_view->format), range);
388}
282 389
283 VkBufferMemoryBarrier barrier; 390[[nodiscard]] VkImageSubresourceLayers MakeSubresourceLayers(const ImageView* image_view) {
284 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 391 return VkImageSubresourceLayers{
285 barrier.pNext = nullptr; 392 .aspectMask = ImageAspectMask(image_view->format),
286 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 393 .mipLevel = static_cast<u32>(image_view->range.base.level),
287 barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 394 .baseArrayLayer = static_cast<u32>(image_view->range.base.layer),
288 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; // They'll be ignored anyway 395 .layerCount = static_cast<u32>(image_view->range.extent.layers),
289 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 396 };
290 barrier.buffer = dst_buffer;
291 barrier.offset = 0;
292 barrier.size = size;
293 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
294 0, {}, barrier, {});
295 });
296} 397}
297 398
298void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { 399[[nodiscard]] constexpr SwizzleSource ConvertGreenRed(SwizzleSource value) {
299 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 400 switch (value) {
300 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 401 case SwizzleSource::G:
301 402 return SwizzleSource::R;
302 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 403 default:
303 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 404 return value;
304
305 for (u32 level = 0; level < params.num_levels; ++level) {
306 const VkBufferImageCopy copy = GetBufferImageCopy(level);
307 if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
308 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
309 copy](vk::CommandBuffer cmdbuf) {
310 std::array<VkBufferImageCopy, 2> copies = {copy, copy};
311 copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
312 copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
313 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
314 copies);
315 });
316 } else {
317 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
318 copy](vk::CommandBuffer cmdbuf) {
319 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
320 });
321 }
322 } 405 }
323} 406}
324 407
325VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { 408void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
326 return { 409 VkImageAspectFlags aspect_mask, bool is_initialized,
327 .bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted), 410 std::span<const VkBufferImageCopy> copies) {
328 .bufferRowLength = 0, 411 static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT |
329 .bufferImageHeight = 0, 412 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
330 .imageSubresource = 413 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
414 const VkImageMemoryBarrier read_barrier{
415 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
416 .pNext = nullptr,
417 .srcAccessMask = ACCESS_FLAGS,
418 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
419 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
420 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
421 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
422 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
423 .image = image,
424 .subresourceRange =
331 { 425 {
332 .aspectMask = image->GetAspectMask(), 426 .aspectMask = aspect_mask,
333 .mipLevel = level, 427 .baseMipLevel = 0,
428 .levelCount = VK_REMAINING_MIP_LEVELS,
334 .baseArrayLayer = 0, 429 .baseArrayLayer = 0,
335 .layerCount = static_cast<u32>(params.GetNumLayers()), 430 .layerCount = VK_REMAINING_ARRAY_LAYERS,
336 }, 431 },
337 .imageOffset = {.x = 0, .y = 0, .z = 0}, 432 };
338 .imageExtent = 433 const VkImageMemoryBarrier write_barrier{
434 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
435 .pNext = nullptr,
436 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
437 .dstAccessMask = ACCESS_FLAGS,
438 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
439 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
440 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
441 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
442 .image = image,
443 .subresourceRange =
339 { 444 {
340 .width = params.GetMipWidth(level), 445 .aspectMask = aspect_mask,
341 .height = params.GetMipHeight(level), 446 .baseMipLevel = 0,
342 .depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1U, 447 .levelCount = VK_REMAINING_MIP_LEVELS,
448 .baseArrayLayer = 0,
449 .layerCount = VK_REMAINING_ARRAY_LAYERS,
343 }, 450 },
344 }; 451 };
452 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
453 read_barrier);
454 cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
455 // TODO: Move this to another API
456 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
457 write_barrier);
345} 458}
346 459
347VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { 460[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region,
348 return {image->GetAspectMask(), 0, params.num_levels, 0, 461 const std::array<Offset2D, 2>& src_region,
349 static_cast<u32>(params.GetNumLayers())}; 462 const VkImageSubresourceLayers& dst_layers,
463 const VkImageSubresourceLayers& src_layers) {
464 return VkImageBlit{
465 .srcSubresource = src_layers,
466 .srcOffsets =
467 {
468 {
469 .x = src_region[0].x,
470 .y = src_region[0].y,
471 .z = 0,
472 },
473 {
474 .x = src_region[1].x,
475 .y = src_region[1].y,
476 .z = 1,
477 },
478 },
479 .dstSubresource = dst_layers,
480 .dstOffsets =
481 {
482 {
483 .x = dst_region[0].x,
484 .y = dst_region[0].y,
485 .z = 0,
486 },
487 {
488 .x = dst_region[1].x,
489 .y = dst_region[1].y,
490 .z = 1,
491 },
492 },
493 };
350} 494}
351 495
352CachedSurfaceView::CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, 496[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region,
353 const ViewParams& view_params_) 497 const std::array<Offset2D, 2>& src_region,
354 : ViewBase{view_params_}, surface_params{surface_.GetSurfaceParams()}, 498 const VkImageSubresourceLayers& dst_layers,
355 image{surface_.GetImageHandle()}, buffer_view{surface_.GetBufferViewHandle()}, 499 const VkImageSubresourceLayers& src_layers) {
356 aspect_mask{surface_.GetAspectMask()}, device{device_}, surface{surface_}, 500 return VkImageResolve{
357 base_level{view_params_.base_level}, num_levels{view_params_.num_levels}, 501 .srcSubresource = src_layers,
358 image_view_type{image ? GetImageViewType(view_params_.target) : VK_IMAGE_VIEW_TYPE_1D} { 502 .srcOffset =
359 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 503 {
360 base_layer = 0; 504 .x = src_region[0].x,
361 num_layers = 1; 505 .y = src_region[0].y,
362 base_slice = view_params_.base_layer; 506 .z = 0,
363 num_slices = view_params_.num_layers; 507 },
364 } else { 508 .dstSubresource = dst_layers,
365 base_layer = view_params_.base_layer; 509 .dstOffset =
366 num_layers = view_params_.num_layers; 510 {
367 } 511 .x = dst_region[0].x,
512 .y = dst_region[0].y,
513 .z = 0,
514 },
515 .extent =
516 {
517 .width = static_cast<u32>(dst_region[1].x - dst_region[0].x),
518 .height = static_cast<u32>(dst_region[1].y - dst_region[0].y),
519 .depth = 1,
520 },
521 };
368} 522}
369 523
370CachedSurfaceView::~CachedSurfaceView() = default; 524struct RangedBarrierRange {
371 525 u32 min_mip = std::numeric_limits<u32>::max();
372VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, 526 u32 max_mip = std::numeric_limits<u32>::min();
373 SwizzleSource z_source, SwizzleSource w_source) { 527 u32 min_layer = std::numeric_limits<u32>::max();
374 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 528 u32 max_layer = std::numeric_limits<u32>::min();
375 if (last_image_view && last_swizzle == new_swizzle) { 529
376 return last_image_view; 530 void AddLayers(const VkImageSubresourceLayers& layers) {
531 min_mip = std::min(min_mip, layers.mipLevel);
532 max_mip = std::max(max_mip, layers.mipLevel + 1);
533 min_layer = std::min(min_layer, layers.baseArrayLayer);
534 max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
377 } 535 }
378 last_swizzle = new_swizzle;
379 536
380 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); 537 VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
381 auto& image_view = entry->second; 538 return VkImageSubresourceRange{
382 if (!is_cache_miss) { 539 .aspectMask = aspect_mask,
383 return last_image_view = *image_view; 540 .baseMipLevel = min_mip,
541 .levelCount = max_mip - min_mip,
542 .baseArrayLayer = min_layer,
543 .layerCount = max_layer - min_layer,
544 };
384 } 545 }
546};
385 547
386 std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), 548} // Anonymous namespace
387 MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
388 if (surface_params.pixel_format == PixelFormat::A1B5G5R5_UNORM) {
389 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
390 std::swap(swizzle[0], swizzle[2]);
391 }
392 549
393 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 550void TextureCacheRuntime::Finish() {
394 // hardware. To emulate this on Vulkan we specify it in the aspect. 551 scheduler.Finish();
395 VkImageAspectFlags aspect = aspect_mask; 552}
396 if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
397 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
398 const bool is_first = x_source == SwizzleSource::R;
399 switch (surface_params.pixel_format) {
400 case PixelFormat::D24_UNORM_S8_UINT:
401 case PixelFormat::D32_FLOAT_S8_UINT:
402 aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
403 break;
404 case PixelFormat::S8_UINT_D24_UNORM:
405 aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
406 break;
407 default:
408 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
409 UNIMPLEMENTED();
410 }
411 553
412 // Make sure we sample the first component 554ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
413 std::transform( 555 const auto& buffer = staging_buffer_pool.GetUnusedBuffer(size, true);
414 swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { 556 return ImageBufferMap{
415 return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; 557 .handle = *buffer.handle,
416 }); 558 .map = buffer.commit->Map(size),
417 } 559 };
560}
418 561
419 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 562void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
420 ASSERT(base_slice == 0); 563 const std::array<Offset2D, 2>& dst_region,
421 ASSERT(num_slices == surface_params.depth); 564 const std::array<Offset2D, 2>& src_region,
565 Tegra::Engines::Fermi2D::Filter filter,
566 Tegra::Engines::Fermi2D::Operation operation) {
567 const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);
568 const bool is_dst_msaa = dst.Samples() != VK_SAMPLE_COUNT_1_BIT;
569 const bool is_src_msaa = src.Samples() != VK_SAMPLE_COUNT_1_BIT;
570 ASSERT(aspect_mask == ImageAspectMask(dst.format));
571 if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
572 blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
573 operation);
574 return;
422 } 575 }
423 576 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
424 image_view = device.GetLogical().CreateImageView({ 577 if (!device.IsBlitDepthStencilSupported()) {
425 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 578 UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa);
426 .pNext = nullptr, 579 blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(),
427 .flags = 0, 580 dst_region, src_region, filter, operation);
428 .image = surface.GetImageHandle(), 581 return;
429 .viewType = image_view_type, 582 }
430 .format = surface.GetImage().GetFormat(), 583 }
431 .components = 584 ASSERT(src.ImageFormat() == dst.ImageFormat());
432 { 585 ASSERT(!(is_dst_msaa && !is_src_msaa));
433 .r = swizzle[0], 586 ASSERT(operation == Fermi2D::Operation::SrcCopy);
434 .g = swizzle[1], 587
435 .b = swizzle[2], 588 const VkImage dst_image = dst.ImageHandle();
436 .a = swizzle[3], 589 const VkImage src_image = src.ImageHandle();
590 const VkImageSubresourceLayers dst_layers = MakeSubresourceLayers(&dst);
591 const VkImageSubresourceLayers src_layers = MakeSubresourceLayers(&src);
592 const bool is_resolve = is_src_msaa && !is_dst_msaa;
593 scheduler.RequestOutsideRenderPassOperationContext();
594 scheduler.Record([filter, dst_region, src_region, dst_image, src_image, dst_layers, src_layers,
595 aspect_mask, is_resolve](vk::CommandBuffer cmdbuf) {
596 const std::array read_barriers{
597 VkImageMemoryBarrier{
598 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
599 .pNext = nullptr,
600 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
601 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
602 VK_ACCESS_TRANSFER_WRITE_BIT,
603 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
604 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
605 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
606 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
607 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608 .image = src_image,
609 .subresourceRange{
610 .aspectMask = aspect_mask,
611 .baseMipLevel = 0,
612 .levelCount = VK_REMAINING_MIP_LEVELS,
613 .baseArrayLayer = 0,
614 .layerCount = VK_REMAINING_ARRAY_LAYERS,
615 },
437 }, 616 },
438 .subresourceRange = 617 VkImageMemoryBarrier{
439 { 618 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
440 .aspectMask = aspect, 619 .pNext = nullptr,
441 .baseMipLevel = base_level, 620 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT |
442 .levelCount = num_levels, 621 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
443 .baseArrayLayer = base_layer, 622 VK_ACCESS_TRANSFER_WRITE_BIT,
444 .layerCount = num_layers, 623 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
624 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
625 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
626 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
627 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
628 .image = dst_image,
629 .subresourceRange{
630 .aspectMask = aspect_mask,
631 .baseMipLevel = 0,
632 .levelCount = VK_REMAINING_MIP_LEVELS,
633 .baseArrayLayer = 0,
634 .layerCount = VK_REMAINING_ARRAY_LAYERS,
635 },
636 },
637 };
638 VkImageMemoryBarrier write_barrier{
639 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
640 .pNext = nullptr,
641 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
642 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
643 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
644 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
645 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
646 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
647 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
648 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
649 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
650 .image = dst_image,
651 .subresourceRange{
652 .aspectMask = aspect_mask,
653 .baseMipLevel = 0,
654 .levelCount = VK_REMAINING_MIP_LEVELS,
655 .baseArrayLayer = 0,
656 .layerCount = VK_REMAINING_ARRAY_LAYERS,
445 }, 657 },
658 };
659 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
660 0, nullptr, nullptr, read_barriers);
661 if (is_resolve) {
662 cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
663 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
664 MakeImageResolve(dst_region, src_region, dst_layers, src_layers));
665 } else {
666 const bool is_linear = filter == Fermi2D::Filter::Bilinear;
667 const VkFilter vk_filter = is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
668 cmdbuf.BlitImage(
669 src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
670 MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter);
671 }
672 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
673 0, write_barrier);
446 }); 674 });
447
448 return last_image_view = *image_view;
449} 675}
450 676
451VkImageView CachedSurfaceView::GetAttachment() { 677void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
452 if (render_target) { 678 switch (dst_view.format) {
453 return *render_target; 679 case PixelFormat::R16_UNORM:
680 if (src_view.format == PixelFormat::D16_UNORM) {
681 return blit_image_helper.ConvertD16ToR16(dst, src_view);
682 }
683 break;
684 case PixelFormat::R32_FLOAT:
685 if (src_view.format == PixelFormat::D32_FLOAT) {
686 return blit_image_helper.ConvertD32ToR32(dst, src_view);
687 }
688 break;
689 case PixelFormat::D16_UNORM:
690 if (src_view.format == PixelFormat::R16_UNORM) {
691 return blit_image_helper.ConvertR16ToD16(dst, src_view);
692 }
693 break;
694 case PixelFormat::D32_FLOAT:
695 if (src_view.format == PixelFormat::R32_FLOAT) {
696 return blit_image_helper.ConvertR32ToD32(dst, src_view);
697 }
698 break;
699 default:
700 break;
454 } 701 }
702 UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format);
703}
455 704
456 VkImageViewCreateInfo ci{ 705void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
457 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 706 std::span<const VideoCommon::ImageCopy> copies) {
458 .pNext = nullptr, 707 std::vector<VkImageCopy> vk_copies(copies.size());
459 .flags = 0, 708 const VkImageAspectFlags aspect_mask = dst.AspectMask();
460 .image = surface.GetImageHandle(), 709 ASSERT(aspect_mask == src.AspectMask());
461 .viewType = VK_IMAGE_VIEW_TYPE_1D, 710
462 .format = surface.GetImage().GetFormat(), 711 std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) {
463 .components = 712 return MakeImageCopy(copy, aspect_mask);
464 { 713 });
465 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 714 const VkImage dst_image = dst.Handle();
466 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 715 const VkImage src_image = src.Handle();
467 .b = VK_COMPONENT_SWIZZLE_IDENTITY, 716 scheduler.RequestOutsideRenderPassOperationContext();
468 .a = VK_COMPONENT_SWIZZLE_IDENTITY, 717 scheduler.Record([dst_image, src_image, aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
718 RangedBarrierRange dst_range;
719 RangedBarrierRange src_range;
720 for (const VkImageCopy& copy : vk_copies) {
721 dst_range.AddLayers(copy.dstSubresource);
722 src_range.AddLayers(copy.srcSubresource);
723 }
724 const std::array read_barriers{
725 VkImageMemoryBarrier{
726 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
727 .pNext = nullptr,
728 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
729 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
730 VK_ACCESS_TRANSFER_WRITE_BIT,
731 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
732 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
733 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
734 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
735 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
736 .image = src_image,
737 .subresourceRange = src_range.SubresourceRange(aspect_mask),
469 }, 738 },
470 .subresourceRange = 739 VkImageMemoryBarrier{
471 { 740 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
472 .aspectMask = aspect_mask, 741 .pNext = nullptr,
473 .baseMipLevel = base_level, 742 .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
474 .levelCount = num_levels, 743 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
475 .baseArrayLayer = 0, 744 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
476 .layerCount = 0, 745 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
746 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
747 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
748 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
749 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
750 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
751 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
752 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
753 .image = dst_image,
754 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
477 }, 755 },
478 }; 756 };
479 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { 757 const VkImageMemoryBarrier write_barrier{
480 ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; 758 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
481 ci.subresourceRange.baseArrayLayer = base_slice; 759 .pNext = nullptr,
482 ci.subresourceRange.layerCount = num_slices; 760 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
761 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT |
762 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
763 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
764 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
765 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
766 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
767 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
768 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
769 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
770 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
771 .image = dst_image,
772 .subresourceRange = dst_range.SubresourceRange(aspect_mask),
773 };
774 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
775 0, {}, {}, read_barriers);
776 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image,
777 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
778 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
779 0, write_barrier);
780 });
781}
782
783Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
784 VAddr cpu_addr_)
785 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
786 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
787 aspect_mask(ImageAspectMask(info.format)) {
788 if (image) {
789 commit = runtime.memory_manager.Commit(image, false);
483 } else { 790 } else {
484 ci.viewType = image_view_type; 791 commit = runtime.memory_manager.Commit(buffer, false);
485 ci.subresourceRange.baseArrayLayer = base_layer; 792 }
486 ci.subresourceRange.layerCount = num_layers; 793 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
794 flags |= VideoCommon::ImageFlagBits::Converted;
795 }
796 if (runtime.device.HasDebuggingToolAttached()) {
797 if (image) {
798 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
799 } else {
800 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
801 }
487 } 802 }
488 render_target = device.GetLogical().CreateImageView(ci);
489 return *render_target;
490} 803}
491 804
492VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, 805void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
493 Tegra::Engines::Maxwell3D& maxwell3d_, 806 std::span<const BufferImageCopy> copies) {
494 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, 807 // TODO: Move this to another API
495 VKMemoryManager& memory_manager_, VKScheduler& scheduler_, 808 scheduler->RequestOutsideRenderPassOperationContext();
496 VKStagingBufferPool& staging_pool_) 809 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
497 : TextureCache(rasterizer_, maxwell3d_, gpu_memory_, device_.IsOptimalAstcSupported()), 810 const VkBuffer src_buffer = map.handle;
498 device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ 811 const VkImage vk_image = *image;
499 staging_pool_} {} 812 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
500 813 const bool is_initialized = std::exchange(initialized, true);
501VKTextureCache::~VKTextureCache() = default; 814 scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
502 815 vk_copies](vk::CommandBuffer cmdbuf) {
503Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 816 CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
504 return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, 817 });
505 gpu_addr, params);
506} 818}
507 819
508void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, 820void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
509 const VideoCommon::CopyParams& copy_params) { 821 std::span<const VideoCommon::BufferCopy> copies) {
510 const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 822 // TODO: Move this to another API
511 const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D; 823 scheduler->RequestOutsideRenderPassOperationContext();
512 UNIMPLEMENTED_IF(src_3d); 824 std::vector vk_copies = TransformBufferCopies(copies, buffer_offset);
825 const VkBuffer src_buffer = map.handle;
826 const VkBuffer dst_buffer = *buffer;
827 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
828 // TODO: Barriers
829 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
830 });
831}
513 832
514 // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and 833void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
515 // dimension respectively. 834 std::span<const BufferImageCopy> copies) {
516 const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z; 835 std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask);
517 const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0; 836 scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask,
837 vk_copies](vk::CommandBuffer cmdbuf) {
838 // TODO: Barriers
839 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies);
840 });
841}
518 842
519 const u32 extent_z = dst_3d ? copy_params.depth : 1; 843ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
520 const u32 num_layers = dst_3d ? 1 : copy_params.depth; 844 ImageId image_id_, Image& image)
845 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
846 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount(
847 image.info.num_samples)} {
848 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
849 std::array<SwizzleSource, 4> swizzle{
850 SwizzleSource::R,
851 SwizzleSource::G,
852 SwizzleSource::B,
853 SwizzleSource::A,
854 };
855 if (!info.IsRenderTarget()) {
856 swizzle = info.Swizzle();
857 if ((aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0) {
858 std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
859 }
860 }
861 const VkFormat vk_format =
862 MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format;
863 const VkImageViewCreateInfo create_info{
864 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
865 .pNext = nullptr,
866 .flags = 0,
867 .image = image.Handle(),
868 .viewType = VkImageViewType{},
869 .format = vk_format,
870 .components{
871 .r = ComponentSwizzle(swizzle[0]),
872 .g = ComponentSwizzle(swizzle[1]),
873 .b = ComponentSwizzle(swizzle[2]),
874 .a = ComponentSwizzle(swizzle[3]),
875 },
876 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
877 };
878 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) {
879 VkImageViewCreateInfo ci{create_info};
880 ci.viewType = ImageViewType(view_type);
881 if (num_layers) {
882 ci.subresourceRange.layerCount = *num_layers;
883 }
884 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
885 if (device->HasDebuggingToolAttached()) {
886 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str());
887 }
888 image_views[static_cast<size_t>(view_type)] = std::move(handle);
889 };
890 switch (info.type) {
891 case VideoCommon::ImageViewType::e1D:
892 case VideoCommon::ImageViewType::e1DArray:
893 create(VideoCommon::ImageViewType::e1D, 1);
894 create(VideoCommon::ImageViewType::e1DArray, std::nullopt);
895 render_target = Handle(VideoCommon::ImageViewType::e1DArray);
896 break;
897 case VideoCommon::ImageViewType::e2D:
898 case VideoCommon::ImageViewType::e2DArray:
899 create(VideoCommon::ImageViewType::e2D, 1);
900 create(VideoCommon::ImageViewType::e2DArray, std::nullopt);
901 render_target = Handle(VideoCommon::ImageViewType::e2DArray);
902 break;
903 case VideoCommon::ImageViewType::e3D:
904 create(VideoCommon::ImageViewType::e3D, std::nullopt);
905 render_target = Handle(VideoCommon::ImageViewType::e3D);
906 break;
907 case VideoCommon::ImageViewType::Cube:
908 case VideoCommon::ImageViewType::CubeArray:
909 create(VideoCommon::ImageViewType::Cube, 6);
910 create(VideoCommon::ImageViewType::CubeArray, std::nullopt);
911 break;
912 case VideoCommon::ImageViewType::Rect:
913 UNIMPLEMENTED();
914 break;
915 case VideoCommon::ImageViewType::Buffer:
916 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{
917 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
918 .pNext = nullptr,
919 .flags = 0,
920 .buffer = image.Buffer(),
921 .format = vk_format,
922 .offset = 0, // TODO: Redesign buffer cache to support this
923 .range = image.guest_size_bytes,
924 });
925 break;
926 }
927}
521 928
522 // We can't copy inside a renderpass 929ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
523 scheduler.RequestOutsideRenderPassOperationContext(); 930 : VideoCommon::ImageViewBase{params} {}
524 931
525 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, 932VkImageView ImageView::DepthView() {
526 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, 933 if (depth_view) {
527 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 934 return *depth_view;
528 dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, 935 }
529 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 936 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT);
530 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 937 return *depth_view;
938}
531 939
532 const VkImageCopy copy{ 940VkImageView ImageView::StencilView() {
533 .srcSubresource = 941 if (stencil_view) {
534 { 942 return *stencil_view;
535 .aspectMask = src_surface->GetAspectMask(), 943 }
536 .mipLevel = copy_params.source_level, 944 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT);
537 .baseArrayLayer = copy_params.source_z, 945 return *stencil_view;
538 .layerCount = num_layers, 946}
539 },
540 .srcOffset =
541 {
542 .x = static_cast<s32>(copy_params.source_x),
543 .y = static_cast<s32>(copy_params.source_y),
544 .z = 0,
545 },
546 .dstSubresource =
547 {
548 .aspectMask = dst_surface->GetAspectMask(),
549 .mipLevel = copy_params.dest_level,
550 .baseArrayLayer = dst_base_layer,
551 .layerCount = num_layers,
552 },
553 .dstOffset =
554 {
555 .x = static_cast<s32>(copy_params.dest_x),
556 .y = static_cast<s32>(copy_params.dest_y),
557 .z = static_cast<s32>(dst_offset_z),
558 },
559 .extent =
560 {
561 .width = copy_params.width,
562 .height = copy_params.height,
563 .depth = extent_z,
564 },
565 };
566 947
567 const VkImage src_image = src_surface->GetImageHandle(); 948vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) {
568 const VkImage dst_image = dst_surface->GetImageHandle(); 949 return device->GetLogical().CreateImageView({
569 scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { 950 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
570 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 951 .pNext = nullptr,
571 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); 952 .flags = 0,
953 .image = image_handle,
954 .viewType = ImageViewType(type),
955 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, format).format,
956 .components{
957 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
958 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
959 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
960 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
961 },
962 .subresourceRange = MakeSubresourceRange(aspect_mask, range),
572 }); 963 });
573} 964}
574 965
575void VKTextureCache::ImageBlit(View& src_view, View& dst_view, 966Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& tsc) {
576 const Tegra::Engines::Fermi2D::Config& copy_config) { 967 const auto& device = runtime.device;
577 // We can't blit inside a renderpass 968 const bool arbitrary_borders = runtime.device.IsExtCustomBorderColorSupported();
578 scheduler.RequestOutsideRenderPassOperationContext(); 969 const std::array<float, 4> color = tsc.BorderColor();
579 970 // C++20 bit_cast
580 src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 971 VkClearColorValue border_color;
581 VK_ACCESS_TRANSFER_READ_BIT); 972 std::memcpy(&border_color, &color, sizeof(color));
582 dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, 973 const VkSamplerCustomBorderColorCreateInfoEXT border_ci{
583 VK_ACCESS_TRANSFER_WRITE_BIT); 974 .sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
584 975 .pNext = nullptr,
585 VkImageBlit blit; 976 .customBorderColor = border_color,
586 blit.srcSubresource = src_view->GetImageSubresourceLayers(); 977 .format = VK_FORMAT_UNDEFINED,
587 blit.srcOffsets[0].x = copy_config.src_rect.left; 978 };
588 blit.srcOffsets[0].y = copy_config.src_rect.top; 979 const void* pnext = nullptr;
589 blit.srcOffsets[0].z = 0; 980 if (arbitrary_borders) {
590 blit.srcOffsets[1].x = copy_config.src_rect.right; 981 pnext = &border_ci;
591 blit.srcOffsets[1].y = copy_config.src_rect.bottom; 982 }
592 blit.srcOffsets[1].z = 1; 983 const VkSamplerReductionModeCreateInfoEXT reduction_ci{
593 blit.dstSubresource = dst_view->GetImageSubresourceLayers(); 984 .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT,
594 blit.dstOffsets[0].x = copy_config.dst_rect.left; 985 .pNext = pnext,
595 blit.dstOffsets[0].y = copy_config.dst_rect.top; 986 .reductionMode = MaxwellToVK::SamplerReduction(tsc.reduction_filter),
596 blit.dstOffsets[0].z = 0; 987 };
597 blit.dstOffsets[1].x = copy_config.dst_rect.right; 988 if (runtime.device.IsExtSamplerFilterMinmaxSupported()) {
598 blit.dstOffsets[1].y = copy_config.dst_rect.bottom; 989 pnext = &reduction_ci;
599 blit.dstOffsets[1].z = 1; 990 } else if (reduction_ci.reductionMode != VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT) {
600 991 LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
601 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 992 }
602 993 // Some games have samplers with garbage. Sanitize them here.
603 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 994 const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
604 is_linear](vk::CommandBuffer cmdbuf) { 995 sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
605 cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, 996 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
606 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, 997 .pNext = pnext,
607 is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); 998 .flags = 0,
999 .magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
1000 .minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
1001 .mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
1002 .addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
1003 .addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
1004 .addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
1005 .mipLodBias = tsc.LodBias(),
1006 .anisotropyEnable = static_cast<VkBool32>(max_anisotropy > 1.0f ? VK_TRUE : VK_FALSE),
1007 .maxAnisotropy = max_anisotropy,
1008 .compareEnable = tsc.depth_compare_enabled,
1009 .compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
1010 .minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.MinLod(),
1011 .maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.MaxLod(),
1012 .borderColor =
1013 arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
1014 .unnormalizedCoordinates = VK_FALSE,
608 }); 1015 });
609} 1016}
610 1017
611void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) { 1018Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
612 // Currently unimplemented. PBO copies should be dropped and we should use a render pass to 1019 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
613 // convert from color to depth and viceversa. 1020 std::vector<VkAttachmentDescription> descriptions;
614 LOG_WARNING(Render_Vulkan, "Unimplemented"); 1021 std::vector<VkImageView> attachments;
1022 RenderPassKey renderpass_key{};
1023 s32 num_layers = 1;
1024
1025 for (size_t index = 0; index < NUM_RT; ++index) {
1026 const ImageView* const color_buffer = color_buffers[index];
1027 if (!color_buffer) {
1028 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1029 continue;
1030 }
1031 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1032 attachments.push_back(color_buffer->RenderTarget());
1033 renderpass_key.color_formats[index] = color_buffer->format;
1034 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
1035 images[num_images] = color_buffer->ImageHandle();
1036 image_ranges[num_images] = MakeSubresourceRange(color_buffer);
1037 samples = color_buffer->Samples();
1038 ++num_images;
1039 }
1040 const size_t num_colors = attachments.size();
1041 const VkAttachmentReference* depth_attachment =
1042 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1043 if (depth_buffer) {
1044 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1045 attachments.push_back(depth_buffer->RenderTarget());
1046 renderpass_key.depth_format = depth_buffer->format;
1047 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
1048 images[num_images] = depth_buffer->ImageHandle();
1049 image_ranges[num_images] = MakeSubresourceRange(depth_buffer);
1050 samples = depth_buffer->Samples();
1051 ++num_images;
1052 } else {
1053 renderpass_key.depth_format = PixelFormat::Invalid;
1054 }
1055 renderpass_key.samples = samples;
1056
1057 const auto& device = runtime.device.GetLogical();
1058 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key);
1059 if (is_new) {
1060 const VkSubpassDescription subpass{
1061 .flags = 0,
1062 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1063 .inputAttachmentCount = 0,
1064 .pInputAttachments = nullptr,
1065 .colorAttachmentCount = static_cast<u32>(num_colors),
1066 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1067 .pResolveAttachments = nullptr,
1068 .pDepthStencilAttachment = depth_attachment,
1069 .preserveAttachmentCount = 0,
1070 .pPreserveAttachments = nullptr,
1071 };
1072 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1073 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1074 .pNext = nullptr,
1075 .flags = 0,
1076 .attachmentCount = static_cast<u32>(descriptions.size()),
1077 .pAttachments = descriptions.data(),
1078 .subpassCount = 1,
1079 .pSubpasses = &subpass,
1080 .dependencyCount = 0,
1081 .pDependencies = nullptr,
1082 });
1083 }
1084 renderpass = *cache_pair->second;
1085 render_area = VkExtent2D{
1086 .width = key.size.width,
1087 .height = key.size.height,
1088 };
1089 num_color_buffers = static_cast<u32>(num_colors);
1090 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{
1091 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1092 .pNext = nullptr,
1093 .flags = 0,
1094 .renderPass = renderpass,
1095 .attachmentCount = static_cast<u32>(attachments.size()),
1096 .pAttachments = attachments.data(),
1097 .width = key.size.width,
1098 .height = key.size.height,
1099 .layers = static_cast<u32>(num_layers),
1100 });
1101 if (runtime.device.HasDebuggingToolAttached()) {
1102 framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str());
1103 }
615} 1104}
616 1105
617} // namespace Vulkan 1106} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index b0be4cb0f..edc3d80c0 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -4,217 +4,265 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <compare>
8#include <unordered_map> 8#include <span>
9 9
10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/vk_image.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 10#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/renderer_vulkan/wrapper.h"
15#include "video_core/texture_cache/surface_base.h"
16#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache.h"
17 13
18namespace VideoCore {
19class RasterizerInterface;
20}
21
22namespace Vulkan { 14namespace Vulkan {
23 15
24class RasterizerVulkan; 16using VideoCommon::ImageId;
17using VideoCommon::NUM_RT;
18using VideoCommon::Offset2D;
19using VideoCommon::RenderTargets;
20using VideoCore::Surface::PixelFormat;
21
25class VKDevice; 22class VKDevice;
26class VKScheduler; 23class VKScheduler;
27class VKStagingBufferPool; 24class VKStagingBufferPool;
28 25
29class CachedSurfaceView; 26class BlitImageHelper;
30class CachedSurface; 27class Image;
28class ImageView;
29class Framebuffer;
31 30
32using Surface = std::shared_ptr<CachedSurface>; 31struct RenderPassKey {
33using View = std::shared_ptr<CachedSurfaceView>; 32 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
35 33
36using VideoCommon::SurfaceParams; 34 std::array<PixelFormat, NUM_RT> color_formats;
37using VideoCommon::ViewParams; 35 PixelFormat depth_format;
36 VkSampleCountFlagBits samples;
37};
38 38
39class CachedSurface final : public VideoCommon::SurfaceBase<View> { 39} // namespace Vulkan
40 friend CachedSurfaceView;
41 40
42public: 41namespace std {
43 explicit CachedSurface(const VKDevice& device_, VKMemoryManager& memory_manager_, 42template <>
44 VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_, 43struct hash<Vulkan::RenderPassKey> {
45 GPUVAddr gpu_addr_, const SurfaceParams& params_); 44 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 ~CachedSurface(); 45 size_t value = static_cast<size_t>(key.depth_format) << 48;
46 value ^= static_cast<size_t>(key.samples) << 52;
47 for (size_t i = 0; i < key.color_formats.size(); ++i) {
48 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
49 }
50 return value;
51 }
52};
53} // namespace std
47 54
48 void UploadTexture(const std::vector<u8>& staging_buffer) override; 55namespace Vulkan {
49 void DownloadTexture(std::vector<u8>& staging_buffer) override;
50 56
51 void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 57struct ImageBufferMap {
52 VkImageLayout new_layout) { 58 [[nodiscard]] VkBuffer Handle() const noexcept {
53 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, 59 return handle;
54 new_stage_mask, new_access, new_layout);
55 } 60 }
56 61
57 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 62 [[nodiscard]] std::span<u8> Span() const noexcept {
58 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, 63 return map.Span();
59 VkImageLayout new_layout) {
60 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
61 new_access, new_layout);
62 } 64 }
63 65
64 VKImage& GetImage() { 66 VkBuffer handle;
65 return *image; 67 MemoryMap map;
66 } 68};
67 69
68 const VKImage& GetImage() const { 70struct TextureCacheRuntime {
69 return *image; 71 const VKDevice& device;
70 } 72 VKScheduler& scheduler;
73 VKMemoryManager& memory_manager;
74 VKStagingBufferPool& staging_buffer_pool;
75 BlitImageHelper& blit_image_helper;
76 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache;
77
78 void Finish();
71 79
72 VkImage GetImageHandle() const { 80 [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size);
73 return *image->GetHandle(); 81
82 [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size) {
83 // TODO: Have a special function for this
84 return MapUploadBuffer(size);
74 } 85 }
75 86
76 VkImageAspectFlags GetAspectMask() const { 87 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
77 return image->GetAspectMask(); 88 const std::array<Offset2D, 2>& dst_region,
89 const std::array<Offset2D, 2>& src_region,
90 Tegra::Engines::Fermi2D::Filter filter,
91 Tegra::Engines::Fermi2D::Operation operation);
92
93 void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
94
95 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
96
97 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept {
98 return false;
78 } 99 }
79 100
80 VkBufferView GetBufferViewHandle() const { 101 void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t,
81 return *buffer_view; 102 std::span<const VideoCommon::SwizzleParameters>) {
103 UNREACHABLE();
82 } 104 }
83 105
84protected: 106 void InsertUploadMemoryBarrier() {}
85 void DecorateSurfaceName() override; 107};
86 108
87 View CreateView(const ViewParams& view_params) override; 109class Image : public VideoCommon::ImageBase {
110public:
111 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
112 VAddr cpu_addr);
88 113
89private: 114 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
90 void UploadBuffer(const std::vector<u8>& staging_buffer); 115 std::span<const VideoCommon::BufferImageCopy> copies);
91 116
92 void UploadImage(const std::vector<u8>& staging_buffer); 117 void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
118 std::span<const VideoCommon::BufferCopy> copies);
93 119
94 VkBufferImageCopy GetBufferImageCopy(u32 level) const; 120 void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset,
121 std::span<const VideoCommon::BufferImageCopy> copies);
95 122
96 VkImageSubresourceRange GetImageSubresourceRange() const; 123 [[nodiscard]] VkImage Handle() const noexcept {
124 return *image;
125 }
97 126
98 const VKDevice& device; 127 [[nodiscard]] VkBuffer Buffer() const noexcept {
99 VKMemoryManager& memory_manager; 128 return *buffer;
100 VKScheduler& scheduler; 129 }
101 VKStagingBufferPool& staging_pool; 130
131 [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept {
132 return aspect_mask;
133 }
102 134
103 std::optional<VKImage> image; 135private:
136 VKScheduler* scheduler;
137 vk::Image image;
104 vk::Buffer buffer; 138 vk::Buffer buffer;
105 vk::BufferView buffer_view;
106 VKMemoryCommit commit; 139 VKMemoryCommit commit;
107 140 VkImageAspectFlags aspect_mask = 0;
108 VkFormat format = VK_FORMAT_UNDEFINED; 141 bool initialized = false;
109}; 142};
110 143
111class CachedSurfaceView final : public VideoCommon::ViewBase { 144class ImageView : public VideoCommon::ImageViewBase {
112public: 145public:
113 explicit CachedSurfaceView(const VKDevice& device_, CachedSurface& surface_, 146 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
114 const ViewParams& view_params_); 147 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
115 ~CachedSurfaceView();
116 148
117 VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, 149 [[nodiscard]] VkImageView DepthView();
118 Tegra::Texture::SwizzleSource y_source,
119 Tegra::Texture::SwizzleSource z_source,
120 Tegra::Texture::SwizzleSource w_source);
121 150
122 VkImageView GetAttachment(); 151 [[nodiscard]] VkImageView StencilView();
123 152
124 bool IsSameSurface(const CachedSurfaceView& rhs) const { 153 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept {
125 return &surface == &rhs.surface; 154 return *image_views[static_cast<size_t>(query_type)];
126 } 155 }
127 156
128 u32 GetWidth() const { 157 [[nodiscard]] VkBufferView BufferView() const noexcept {
129 return surface_params.GetMipWidth(base_level); 158 return *buffer_view;
130 } 159 }
131 160
132 u32 GetHeight() const { 161 [[nodiscard]] VkImage ImageHandle() const noexcept {
133 return surface_params.GetMipHeight(base_level); 162 return image_handle;
134 } 163 }
135 164
136 u32 GetNumLayers() const { 165 [[nodiscard]] VkImageView RenderTarget() const noexcept {
137 return num_layers; 166 return render_target;
138 } 167 }
139 168
140 bool IsBufferView() const { 169 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
141 return buffer_view; 170 return image_format;
142 } 171 }
143 172
144 VkImage GetImage() const { 173 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
145 return image; 174 return samples;
146 } 175 }
147 176
148 VkBufferView GetBufferView() const { 177private:
149 return buffer_view; 178 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask);
150 }
151 179
152 VkImageSubresourceRange GetImageSubresourceRange() const { 180 const VKDevice* device = nullptr;
153 return {aspect_mask, base_level, num_levels, base_layer, num_layers}; 181 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views;
154 } 182 vk::ImageView depth_view;
183 vk::ImageView stencil_view;
184 vk::BufferView buffer_view;
185 VkImage image_handle = VK_NULL_HANDLE;
186 VkImageView render_target = VK_NULL_HANDLE;
187 PixelFormat image_format = PixelFormat::Invalid;
188 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
189};
155 190
156 VkImageSubresourceLayers GetImageSubresourceLayers() const { 191class ImageAlloc : public VideoCommon::ImageAllocBase {};
157 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
158 }
159 192
160 void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, 193class Sampler {
161 VkAccessFlags new_access) const { 194public:
162 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, 195 explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
163 new_access, new_layout);
164 }
165 196
166 void MarkAsModified(u64 tick) { 197 [[nodiscard]] VkSampler Handle() const noexcept {
167 surface.MarkAsModified(true, tick); 198 return *sampler;
168 } 199 }
169 200
170private: 201private:
171 // Store a copy of these values to avoid double dereference when reading them 202 vk::Sampler sampler;
172 const SurfaceParams surface_params;
173 const VkImage image;
174 const VkBufferView buffer_view;
175 const VkImageAspectFlags aspect_mask;
176
177 const VKDevice& device;
178 CachedSurface& surface;
179 const u32 base_level;
180 const u32 num_levels;
181 const VkImageViewType image_view_type;
182 u32 base_layer = 0;
183 u32 num_layers = 0;
184 u32 base_slice = 0;
185 u32 num_slices = 0;
186
187 VkImageView last_image_view = nullptr;
188 u32 last_swizzle = 0;
189
190 vk::ImageView render_target;
191 std::unordered_map<u32, vk::ImageView> view_cache;
192}; 203};
193 204
194class VKTextureCache final : public TextureCacheBase { 205class Framebuffer {
195public: 206public:
196 explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer_, 207 explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
197 Tegra::Engines::Maxwell3D& maxwell3d_, 208 ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
198 Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
199 VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
200 VKStagingBufferPool& staging_pool_);
201 ~VKTextureCache();
202 209
203private: 210 [[nodiscard]] VkFramebuffer Handle() const noexcept {
204 Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; 211 return *framebuffer;
212 }
205 213
206 void ImageCopy(Surface& src_surface, Surface& dst_surface, 214 [[nodiscard]] VkRenderPass RenderPass() const noexcept {
207 const VideoCommon::CopyParams& copy_params) override; 215 return renderpass;
216 }
208 217
209 void ImageBlit(View& src_view, View& dst_view, 218 [[nodiscard]] VkExtent2D RenderArea() const noexcept {
210 const Tegra::Engines::Fermi2D::Config& copy_config) override; 219 return render_area;
220 }
211 221
212 void BufferCopy(Surface& src_surface, Surface& dst_surface) override; 222 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
223 return samples;
224 }
213 225
214 const VKDevice& device; 226 [[nodiscard]] u32 NumColorBuffers() const noexcept {
215 VKMemoryManager& memory_manager; 227 return num_color_buffers;
216 VKScheduler& scheduler; 228 }
217 VKStagingBufferPool& staging_pool; 229
230 [[nodiscard]] u32 NumImages() const noexcept {
231 return num_images;
232 }
233
234 [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
235 return images;
236 }
237
238 [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
239 return image_ranges;
240 }
241
242private:
243 vk::Framebuffer framebuffer;
244 VkRenderPass renderpass{};
245 VkExtent2D render_area{};
246 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
247 u32 num_color_buffers = 0;
248 u32 num_images = 0;
249 std::array<VkImage, 9> images{};
250 std::array<VkImageSubresourceRange, 9> image_ranges{};
251};
252
253struct TextureCacheParams {
254 static constexpr bool ENABLE_VALIDATION = true;
255 static constexpr bool FRAMEBUFFER_BLITS = false;
256 static constexpr bool HAS_EMULATED_COPIES = false;
257
258 using Runtime = Vulkan::TextureCacheRuntime;
259 using Image = Vulkan::Image;
260 using ImageAlloc = Vulkan::ImageAlloc;
261 using ImageView = Vulkan::ImageView;
262 using Sampler = Vulkan::Sampler;
263 using Framebuffer = Vulkan::Framebuffer;
218}; 264};
219 265
266using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
267
220} // namespace Vulkan 268} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index f7e3c9821..f098a8540 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -40,30 +40,34 @@ public:
40 40
41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
42 42
43 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{
45 .sampler = sampler,
46 .imageView = image_view,
47 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
48 });
45 } 49 }
46 50
47 void AddImage(VkImageView image_view) { 51 void AddImage(VkImageView image_view) {
48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 52 payload.emplace_back(VkDescriptorImageInfo{
53 .sampler = VK_NULL_HANDLE,
54 .imageView = image_view,
55 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
56 });
49 } 57 }
50 58
51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 59 void AddBuffer(VkBuffer buffer, u64 offset, size_t size) {
52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 60 payload.emplace_back(VkDescriptorBufferInfo{
61 .buffer = buffer,
62 .offset = offset,
63 .range = size,
64 });
53 } 65 }
54 66
55 void AddTexelBuffer(VkBufferView texel_buffer) { 67 void AddTexelBuffer(VkBufferView texel_buffer) {
56 payload.emplace_back(texel_buffer); 68 payload.emplace_back(texel_buffer);
57 } 69 }
58 70
59 VkImageLayout* LastImageLayout() {
60 return &payload.back().image.imageLayout;
61 }
62
63 const VkImageLayout* LastImageLayout() const {
64 return &payload.back().image.imageLayout;
65 }
66
67private: 71private:
68 const VKDevice& device; 72 const VKDevice& device;
69 VKScheduler& scheduler; 73 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 1eced809e..2a21e850d 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -81,6 +81,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
81 X(vkCmdBeginQuery); 81 X(vkCmdBeginQuery);
82 X(vkCmdBeginRenderPass); 82 X(vkCmdBeginRenderPass);
83 X(vkCmdBeginTransformFeedbackEXT); 83 X(vkCmdBeginTransformFeedbackEXT);
84 X(vkCmdBeginDebugUtilsLabelEXT);
84 X(vkCmdBindDescriptorSets); 85 X(vkCmdBindDescriptorSets);
85 X(vkCmdBindIndexBuffer); 86 X(vkCmdBindIndexBuffer);
86 X(vkCmdBindPipeline); 87 X(vkCmdBindPipeline);
@@ -98,6 +99,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
98 X(vkCmdEndQuery); 99 X(vkCmdEndQuery);
99 X(vkCmdEndRenderPass); 100 X(vkCmdEndRenderPass);
100 X(vkCmdEndTransformFeedbackEXT); 101 X(vkCmdEndTransformFeedbackEXT);
102 X(vkCmdEndDebugUtilsLabelEXT);
101 X(vkCmdFillBuffer); 103 X(vkCmdFillBuffer);
102 X(vkCmdPipelineBarrier); 104 X(vkCmdPipelineBarrier);
103 X(vkCmdPushConstants); 105 X(vkCmdPushConstants);
@@ -121,6 +123,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
121 X(vkCmdSetPrimitiveTopologyEXT); 123 X(vkCmdSetPrimitiveTopologyEXT);
122 X(vkCmdSetStencilOpEXT); 124 X(vkCmdSetStencilOpEXT);
123 X(vkCmdSetStencilTestEnableEXT); 125 X(vkCmdSetStencilTestEnableEXT);
126 X(vkCmdResolveImage);
124 X(vkCreateBuffer); 127 X(vkCreateBuffer);
125 X(vkCreateBufferView); 128 X(vkCreateBufferView);
126 X(vkCreateCommandPool); 129 X(vkCreateCommandPool);
@@ -176,6 +179,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
176 X(vkQueueSubmit); 179 X(vkQueueSubmit);
177 X(vkResetFences); 180 X(vkResetFences);
178 X(vkResetQueryPoolEXT); 181 X(vkResetQueryPoolEXT);
182 X(vkSetDebugUtilsObjectNameEXT);
183 X(vkSetDebugUtilsObjectTagEXT);
179 X(vkUnmapMemory); 184 X(vkUnmapMemory);
180 X(vkUpdateDescriptorSetWithTemplateKHR); 185 X(vkUpdateDescriptorSetWithTemplateKHR);
181 X(vkUpdateDescriptorSets); 186 X(vkUpdateDescriptorSets);
@@ -184,6 +189,19 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
184#undef X 189#undef X
185} 190}
186 191
192template <typename T>
193void SetObjectName(const DeviceDispatch* dld, VkDevice device, T handle, VkObjectType type,
194 const char* name) {
195 const VkDebugUtilsObjectNameInfoEXT name_info{
196 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
197 .pNext = nullptr,
198 .objectType = VK_OBJECT_TYPE_IMAGE,
199 .objectHandle = reinterpret_cast<u64>(handle),
200 .pObjectName = name,
201 };
202 Check(dld->vkSetDebugUtilsObjectNameEXT(device, &name_info));
203}
204
187} // Anonymous namespace 205} // Anonymous namespace
188 206
189bool Load(InstanceDispatch& dld) noexcept { 207bool Load(InstanceDispatch& dld) noexcept {
@@ -476,8 +494,7 @@ DebugCallback Instance::TryCreateDebugCallback(
476 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | 494 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
477 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT, 495 VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT,
478 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | 496 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
479 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | 497 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT,
480 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
481 .pfnUserCallback = callback, 498 .pfnUserCallback = callback,
482 .pUserData = nullptr, 499 .pUserData = nullptr,
483 }; 500 };
@@ -493,10 +510,38 @@ void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
493 Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); 510 Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
494} 511}
495 512
513void Buffer::SetObjectNameEXT(const char* name) const {
514 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
515}
516
517void BufferView::SetObjectNameEXT(const char* name) const {
518 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
519}
520
496void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 521void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
497 Check(dld->vkBindImageMemory(owner, handle, memory, offset)); 522 Check(dld->vkBindImageMemory(owner, handle, memory, offset));
498} 523}
499 524
525void Image::SetObjectNameEXT(const char* name) const {
526 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
527}
528
529void ImageView::SetObjectNameEXT(const char* name) const {
530 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
531}
532
533void DeviceMemory::SetObjectNameEXT(const char* name) const {
534 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name);
535}
536
537void Fence::SetObjectNameEXT(const char* name) const {
538 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FENCE, name);
539}
540
541void Framebuffer::SetObjectNameEXT(const char* name) const {
542 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_FRAMEBUFFER, name);
543}
544
500DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { 545DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const {
501 const std::size_t num = ai.descriptorSetCount; 546 const std::size_t num = ai.descriptorSetCount;
502 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); 547 std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num);
@@ -510,6 +555,10 @@ DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) c
510 } 555 }
511} 556}
512 557
558void DescriptorPool::SetObjectNameEXT(const char* name) const {
559 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DESCRIPTOR_POOL, name);
560}
561
513CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { 562CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const {
514 const VkCommandBufferAllocateInfo ai{ 563 const VkCommandBufferAllocateInfo ai{
515 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 564 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
@@ -530,6 +579,10 @@ CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLev
530 } 579 }
531} 580}
532 581
582void CommandPool::SetObjectNameEXT(const char* name) const {
583 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_COMMAND_POOL, name);
584}
585
533std::vector<VkImage> SwapchainKHR::GetImages() const { 586std::vector<VkImage> SwapchainKHR::GetImages() const {
534 u32 num; 587 u32 num;
535 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); 588 Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr));
@@ -538,6 +591,18 @@ std::vector<VkImage> SwapchainKHR::GetImages() const {
538 return images; 591 return images;
539} 592}
540 593
594void Event::SetObjectNameEXT(const char* name) const {
595 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_EVENT, name);
596}
597
598void ShaderModule::SetObjectNameEXT(const char* name) const {
599 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
600}
601
602void Semaphore::SetObjectNameEXT(const char* name) const {
603 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
604}
605
541Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, 606Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci,
542 Span<const char*> enabled_extensions, const void* next, 607 Span<const char*> enabled_extensions, const void* next,
543 DeviceDispatch& dispatch) noexcept { 608 DeviceDispatch& dispatch) noexcept {
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 76f790eab..f9a184e00 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -9,6 +9,7 @@
9#include <limits> 9#include <limits>
10#include <memory> 10#include <memory>
11#include <optional> 11#include <optional>
12#include <span>
12#include <type_traits> 13#include <type_traits>
13#include <utility> 14#include <utility>
14#include <vector> 15#include <vector>
@@ -18,6 +19,10 @@
18 19
19#include "common/common_types.h" 20#include "common/common_types.h"
20 21
22#ifdef _MSC_VER
23#pragma warning(disable : 26812) // Disable prefer enum class over enum
24#endif
25
21namespace Vulkan::vk { 26namespace Vulkan::vk {
22 27
23/** 28/**
@@ -41,6 +46,9 @@ public:
41 /// Construct an empty span. 46 /// Construct an empty span.
42 constexpr Span() noexcept = default; 47 constexpr Span() noexcept = default;
43 48
49 /// Construct an empty span
50 constexpr Span(std::nullptr_t) noexcept {}
51
44 /// Construct a span from a single element. 52 /// Construct a span from a single element.
45 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} 53 constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {}
46 54
@@ -177,6 +185,7 @@ struct DeviceDispatch : public InstanceDispatch {
177 PFN_vkCmdBeginQuery vkCmdBeginQuery; 185 PFN_vkCmdBeginQuery vkCmdBeginQuery;
178 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; 186 PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass;
179 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; 187 PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT;
188 PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT;
180 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; 189 PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets;
181 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; 190 PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer;
182 PFN_vkCmdBindPipeline vkCmdBindPipeline; 191 PFN_vkCmdBindPipeline vkCmdBindPipeline;
@@ -194,6 +203,7 @@ struct DeviceDispatch : public InstanceDispatch {
194 PFN_vkCmdEndQuery vkCmdEndQuery; 203 PFN_vkCmdEndQuery vkCmdEndQuery;
195 PFN_vkCmdEndRenderPass vkCmdEndRenderPass; 204 PFN_vkCmdEndRenderPass vkCmdEndRenderPass;
196 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; 205 PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT;
206 PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT;
197 PFN_vkCmdFillBuffer vkCmdFillBuffer; 207 PFN_vkCmdFillBuffer vkCmdFillBuffer;
198 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; 208 PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier;
199 PFN_vkCmdPushConstants vkCmdPushConstants; 209 PFN_vkCmdPushConstants vkCmdPushConstants;
@@ -217,6 +227,7 @@ struct DeviceDispatch : public InstanceDispatch {
217 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT; 227 PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT;
218 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT; 228 PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT;
219 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT; 229 PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT;
230 PFN_vkCmdResolveImage vkCmdResolveImage;
220 PFN_vkCreateBuffer vkCreateBuffer; 231 PFN_vkCreateBuffer vkCreateBuffer;
221 PFN_vkCreateBufferView vkCreateBufferView; 232 PFN_vkCreateBufferView vkCreateBufferView;
222 PFN_vkCreateCommandPool vkCreateCommandPool; 233 PFN_vkCreateCommandPool vkCreateCommandPool;
@@ -272,6 +283,8 @@ struct DeviceDispatch : public InstanceDispatch {
272 PFN_vkQueueSubmit vkQueueSubmit; 283 PFN_vkQueueSubmit vkQueueSubmit;
273 PFN_vkResetFences vkResetFences; 284 PFN_vkResetFences vkResetFences;
274 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; 285 PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT;
286 PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectNameEXT;
287 PFN_vkSetDebugUtilsObjectTagEXT vkSetDebugUtilsObjectTagEXT;
275 PFN_vkUnmapMemory vkUnmapMemory; 288 PFN_vkUnmapMemory vkUnmapMemory;
276 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; 289 PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
277 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; 290 PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
@@ -542,18 +555,14 @@ private:
542 const DeviceDispatch* dld = nullptr; 555 const DeviceDispatch* dld = nullptr;
543}; 556};
544 557
545using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>;
546using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; 558using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
547using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; 559using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
548using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; 560using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>;
549using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>;
550using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>;
551using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; 561using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
552using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; 562using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
553using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; 563using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
554using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; 564using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
555using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; 565using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
556using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
557using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; 566using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
558 567
559using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; 568using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>;
@@ -605,6 +614,17 @@ class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
605public: 614public:
606 /// Attaches a memory allocation. 615 /// Attaches a memory allocation.
607 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 616 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
617
618 /// Set object name.
619 void SetObjectNameEXT(const char* name) const;
620};
621
622class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
623 using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
624
625public:
626 /// Set object name.
627 void SetObjectNameEXT(const char* name) const;
608}; 628};
609 629
610class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { 630class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
@@ -613,12 +633,26 @@ class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
613public: 633public:
614 /// Attaches a memory allocation. 634 /// Attaches a memory allocation.
615 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; 635 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
636
637 /// Set object name.
638 void SetObjectNameEXT(const char* name) const;
639};
640
641class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
642 using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
643
644public:
645 /// Set object name.
646 void SetObjectNameEXT(const char* name) const;
616}; 647};
617 648
618class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { 649class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> {
619 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; 650 using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle;
620 651
621public: 652public:
653 /// Set object name.
654 void SetObjectNameEXT(const char* name) const;
655
622 u8* Map(VkDeviceSize offset, VkDeviceSize size) const { 656 u8* Map(VkDeviceSize offset, VkDeviceSize size) const {
623 void* data; 657 void* data;
624 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); 658 Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data));
@@ -634,6 +668,9 @@ class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> {
634 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; 668 using Handle<VkFence, VkDevice, DeviceDispatch>::Handle;
635 669
636public: 670public:
671 /// Set object name.
672 void SetObjectNameEXT(const char* name) const;
673
637 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { 674 VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept {
638 return dld->vkWaitForFences(owner, 1, &handle, true, timeout); 675 return dld->vkWaitForFences(owner, 1, &handle, true, timeout);
639 } 676 }
@@ -647,11 +684,22 @@ public:
647 } 684 }
648}; 685};
649 686
687class Framebuffer : public Handle<VkFramebuffer, VkDevice, DeviceDispatch> {
688 using Handle<VkFramebuffer, VkDevice, DeviceDispatch>::Handle;
689
690public:
691 /// Set object name.
692 void SetObjectNameEXT(const char* name) const;
693};
694
650class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { 695class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> {
651 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; 696 using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle;
652 697
653public: 698public:
654 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; 699 DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const;
700
701 /// Set object name.
702 void SetObjectNameEXT(const char* name) const;
655}; 703};
656 704
657class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { 705class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
@@ -660,6 +708,9 @@ class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> {
660public: 708public:
661 CommandBuffers Allocate(std::size_t num_buffers, 709 CommandBuffers Allocate(std::size_t num_buffers,
662 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; 710 VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const;
711
712 /// Set object name.
713 void SetObjectNameEXT(const char* name) const;
663}; 714};
664 715
665class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { 716class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> {
@@ -673,15 +724,29 @@ class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
673 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; 724 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
674 725
675public: 726public:
727 /// Set object name.
728 void SetObjectNameEXT(const char* name) const;
729
676 VkResult GetStatus() const noexcept { 730 VkResult GetStatus() const noexcept {
677 return dld->vkGetEventStatus(owner, handle); 731 return dld->vkGetEventStatus(owner, handle);
678 } 732 }
679}; 733};
680 734
735class ShaderModule : public Handle<VkShaderModule, VkDevice, DeviceDispatch> {
736 using Handle<VkShaderModule, VkDevice, DeviceDispatch>::Handle;
737
738public:
739 /// Set object name.
740 void SetObjectNameEXT(const char* name) const;
741};
742
681class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { 743class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
682 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; 744 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
683 745
684public: 746public:
747 /// Set object name.
748 void SetObjectNameEXT(const char* name) const;
749
685 [[nodiscard]] u64 GetCounter() const { 750 [[nodiscard]] u64 GetCounter() const {
686 u64 value; 751 u64 value;
687 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); 752 Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
@@ -932,6 +997,12 @@ public:
932 regions.data(), filter); 997 regions.data(), filter);
933 } 998 }
934 999
1000 void ResolveImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image,
1001 VkImageLayout dst_layout, Span<VkImageResolve> regions) {
1002 dld->vkCmdResolveImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(),
1003 regions.data());
1004 }
1005
935 void Dispatch(u32 x, u32 y, u32 z) const noexcept { 1006 void Dispatch(u32 x, u32 y, u32 z) const noexcept {
936 dld->vkCmdDispatch(handle, x, y, z); 1007 dld->vkCmdDispatch(handle, x, y, z);
937 } 1008 }
@@ -946,6 +1017,23 @@ public:
946 image_barriers.size(), image_barriers.data()); 1017 image_barriers.size(), image_barriers.data());
947 } 1018 }
948 1019
1020 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1021 VkDependencyFlags dependency_flags = 0) const noexcept {
1022 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, {});
1023 }
1024
1025 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1026 VkDependencyFlags dependency_flags,
1027 const VkBufferMemoryBarrier& buffer_barrier) const noexcept {
1028 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {});
1029 }
1030
1031 void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
1032 VkDependencyFlags dependency_flags,
1033 const VkImageMemoryBarrier& image_barrier) const noexcept {
1034 PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, {}, image_barrier);
1035 }
1036
949 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, 1037 void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout,
950 Span<VkBufferImageCopy> regions) const noexcept { 1038 Span<VkBufferImageCopy> regions) const noexcept {
951 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), 1039 dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(),
@@ -979,6 +1067,13 @@ public:
979 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); 1067 dld->vkCmdPushConstants(handle, layout, flags, offset, size, values);
980 } 1068 }
981 1069
1070 template <typename T>
1071 void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags,
1072 const T& data) const noexcept {
1073 static_assert(std::is_trivially_copyable_v<T>, "<data> is not trivially copyable");
1074 dld->vkCmdPushConstants(handle, layout, flags, 0, static_cast<u32>(sizeof(T)), &data);
1075 }
1076
982 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { 1077 void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept {
983 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); 1078 dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data());
984 } 1079 }
@@ -1088,6 +1183,20 @@ public:
1088 counter_buffers, counter_buffer_offsets); 1183 counter_buffers, counter_buffer_offsets);
1089 } 1184 }
1090 1185
1186 void BeginDebugUtilsLabelEXT(const char* label, std::span<float, 4> color) const noexcept {
1187 const VkDebugUtilsLabelEXT label_info{
1188 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
1189 .pNext = nullptr,
1190 .pLabelName = label,
1191 .color{color[0], color[1], color[2], color[3]},
1192 };
1193 dld->vkCmdBeginDebugUtilsLabelEXT(handle, &label_info);
1194 }
1195
1196 void EndDebugUtilsLabelEXT() const noexcept {
1197 dld->vkCmdEndDebugUtilsLabelEXT(handle);
1198 }
1199
1091private: 1200private:
1092 VkCommandBuffer handle; 1201 VkCommandBuffer handle;
1093 const DeviceDispatch* dld; 1202 const DeviceDispatch* dld;
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
deleted file mode 100644
index 53c7ef12d..000000000
--- a/src/video_core/sampler_cache.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/cityhash.h"
6#include "common/common_types.h"
7#include "video_core/sampler_cache.h"
8
9namespace VideoCommon {
10
11std::size_t SamplerCacheKey::Hash() const {
12 static_assert(sizeof(raw) % sizeof(u64) == 0);
13 return static_cast<std::size_t>(
14 Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
15}
16
17bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
18 return raw == rhs.raw;
19}
20
21} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
deleted file mode 100644
index cbe3ad071..000000000
--- a/src/video_core/sampler_cache.h
+++ /dev/null
@@ -1,60 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <unordered_map>
9
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
15 std::size_t Hash() const;
16
17 bool operator==(const SamplerCacheKey& rhs) const;
18
19 bool operator!=(const SamplerCacheKey& rhs) const {
20 return !operator==(rhs);
21 }
22};
23
24} // namespace VideoCommon
25
26namespace std {
27
28template <>
29struct hash<VideoCommon::SamplerCacheKey> {
30 std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
31 return k.Hash();
32 }
33};
34
35} // namespace std
36
37namespace VideoCommon {
38
39template <typename SamplerType, typename SamplerStorageType>
40class SamplerCache {
41public:
42 SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
43 const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
44 auto& sampler = entry->second;
45 if (is_cache_miss) {
46 sampler = CreateSampler(tsc);
47 }
48 return ToSamplerType(sampler);
49 }
50
51protected:
52 virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
53
54 virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
55
56private:
57 std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
58};
59
60} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 78245473c..09f93463b 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -137,10 +137,9 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
137 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, 137 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
138 Vulkan::VKDescriptorPool& descriptor_pool, 138 Vulkan::VKDescriptorPool& descriptor_pool,
139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 139 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
140 Vulkan::VKRenderPassCache& renderpass_cache,
141 std::vector<VkDescriptorSetLayoutBinding> bindings, 140 std::vector<VkDescriptorSetLayoutBinding> bindings,
142 Vulkan::SPIRVProgram program, 141 Vulkan::SPIRVProgram program,
143 Vulkan::GraphicsPipelineCacheKey key) { 142 Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
144 std::unique_lock lock(queue_mutex); 143 std::unique_lock lock(queue_mutex);
145 pending_queue.push({ 144 pending_queue.push({
146 .backend = Backend::Vulkan, 145 .backend = Backend::Vulkan,
@@ -149,10 +148,10 @@ void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
149 .scheduler = &scheduler, 148 .scheduler = &scheduler,
150 .descriptor_pool = &descriptor_pool, 149 .descriptor_pool = &descriptor_pool,
151 .update_descriptor_queue = &update_descriptor_queue, 150 .update_descriptor_queue = &update_descriptor_queue,
152 .renderpass_cache = &renderpass_cache,
153 .bindings = std::move(bindings), 151 .bindings = std::move(bindings),
154 .program = std::move(program), 152 .program = std::move(program),
155 .key = key, 153 .key = key,
154 .num_color_buffers = num_color_buffers,
156 }); 155 });
157 cv.notify_one(); 156 cv.notify_one();
158} 157}
@@ -205,8 +204,8 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
205 } else if (work.backend == Backend::Vulkan) { 204 } else if (work.backend == Backend::Vulkan) {
206 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( 205 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
207 *work.vk_device, *work.scheduler, *work.descriptor_pool, 206 *work.vk_device, *work.scheduler, *work.descriptor_pool,
208 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, 207 *work.update_descriptor_queue, work.key, work.bindings, work.program,
209 work.program); 208 work.num_color_buffers);
210 209
211 work.pp_cache->EmplacePipeline(std::move(pipeline)); 210 work.pp_cache->EmplacePipeline(std::move(pipeline));
212 } 211 }
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 5a7216019..004e214a8 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -98,9 +98,9 @@ public:
98 Vulkan::VKScheduler& scheduler, 98 Vulkan::VKScheduler& scheduler,
99 Vulkan::VKDescriptorPool& descriptor_pool, 99 Vulkan::VKDescriptorPool& descriptor_pool,
100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, 100 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
101 Vulkan::VKRenderPassCache& renderpass_cache,
102 std::vector<VkDescriptorSetLayoutBinding> bindings, 101 std::vector<VkDescriptorSetLayoutBinding> bindings,
103 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); 102 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
103 u32 num_color_buffers);
104 104
105private: 105private:
106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); 106 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
@@ -127,10 +127,10 @@ private:
127 Vulkan::VKScheduler* scheduler; 127 Vulkan::VKScheduler* scheduler;
128 Vulkan::VKDescriptorPool* descriptor_pool; 128 Vulkan::VKDescriptorPool* descriptor_pool;
129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; 129 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
130 Vulkan::VKRenderPassCache* renderpass_cache;
131 std::vector<VkDescriptorSetLayoutBinding> bindings; 130 std::vector<VkDescriptorSetLayoutBinding> bindings;
132 Vulkan::SPIRVProgram program; 131 Vulkan::SPIRVProgram program;
133 Vulkan::GraphicsPipelineCacheKey key; 132 Vulkan::GraphicsPipelineCacheKey key;
133 u32 num_color_buffers;
134 }; 134 };
135 135
136 std::condition_variable cv; 136 std::condition_variable cv;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index ab14c1aa3..6576d1208 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -25,7 +25,7 @@ using Tegra::Shader::OpCode;
25namespace { 25namespace {
26 26
27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, 27void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
28 const std::list<Sampler>& used_samplers) { 28 const std::list<SamplerEntry>& used_samplers) {
29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { 29 if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
30 return; 30 return;
31 } 31 }
@@ -43,9 +43,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
43 } 43 }
44} 44}
45 45
46std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, 46std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
47 VideoCore::GuestDriverProfile& gpu_driver, 47 VideoCore::GuestDriverProfile& gpu_driver,
48 const std::list<Sampler>& used_samplers) { 48 const std::list<SamplerEntry>& used_samplers) {
49 const u32 base_offset = sampler_to_deduce.offset; 49 const u32 base_offset = sampler_to_deduce.offset;
50 u32 max_offset{std::numeric_limits<u32>::max()}; 50 u32 max_offset{std::numeric_limits<u32>::max()};
51 for (const auto& sampler : used_samplers) { 51 for (const auto& sampler : used_samplers) {
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 532f66d27..5470e8cf4 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -497,11 +497,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
497 return pc; 497 return pc;
498} 498}
499 499
500Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { 500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value()); 501 const auto offset = static_cast<u32>(image.index.Value());
502 502
503 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 503 const auto it =
504 [offset](const Image& entry) { return entry.offset == offset; }); 504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
505 if (it != std::end(used_images)) { 506 if (it != std::end(used_images)) {
506 ASSERT(!it->is_bindless && it->type == type); 507 ASSERT(!it->is_bindless && it->type == type);
507 return *it; 508 return *it;
@@ -511,7 +512,7 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t
511 return used_images.emplace_back(next_index, offset, type); 512 return used_images.emplace_back(next_index, offset, type);
512} 513}
513 514
514Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { 515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
515 const Node image_register = GetRegister(reg); 516 const Node image_register = GetRegister(reg);
516 const auto result = 517 const auto result =
517 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); 518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
@@ -520,7 +521,7 @@ Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::Im
520 const auto offset = std::get<2>(result); 521 const auto offset = std::get<2>(result);
521 522
522 const auto it = std::find_if(std::begin(used_images), std::end(used_images), 523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
523 [buffer, offset](const Image& entry) { 524 [buffer, offset](const ImageEntry& entry) {
524 return entry.buffer == buffer && entry.offset == offset; 525 return entry.buffer == buffer && entry.offset == offset;
525 }); 526 });
526 if (it != std::end(used_images)) { 527 if (it != std::end(used_images)) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index fb18f631f..833fa2a39 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 141
142 SamplerInfo info; 142 SamplerInfo info;
143 info.is_shadow = is_depth_compare; 143 info.is_shadow = is_depth_compare;
144 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -173,9 +173,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
173 SamplerInfo info; 173 SamplerInfo info;
174 info.type = texture_type; 174 info.type = texture_type;
175 info.is_array = is_array; 175 info.is_array = is_array;
176 const std::optional<Sampler> sampler = is_bindless 176 const std::optional<SamplerEntry> sampler =
177 ? GetBindlessSampler(base_reg, info, index_var) 177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info); 178 : GetSampler(instr.sampler, info);
179 Node4 values; 179 Node4 values;
180 if (!sampler) { 180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); 181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
@@ -217,9 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
217 [[fallthrough]]; 217 [[fallthrough]];
218 case OpCode::Id::TXQ: { 218 case OpCode::Id::TXQ: {
219 Node index_var; 219 Node index_var;
220 const std::optional<Sampler> sampler = is_bindless 220 const std::optional<SamplerEntry> sampler =
221 ? GetBindlessSampler(instr.gpr8, {}, index_var) 221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {}); 222 : GetSampler(instr.sampler, {});
223 223
224 if (!sampler) { 224 if (!sampler) {
225 u32 indexer = 0; 225 u32 indexer = 0;
@@ -272,7 +272,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
272 info.type = texture_type; 272 info.type = texture_type;
273 info.is_array = is_array; 273 info.is_array = is_array;
274 Node index_var; 274 Node index_var;
275 const std::optional<Sampler> sampler = 275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) 276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info); 277 : GetSampler(instr.sampler, info);
278 278
@@ -379,14 +379,15 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
379 return info; 379 return info;
380} 380}
381 381
382std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) { 383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value()); 384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); 385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386 386
387 // If this sampler has already been used, return the existing mapping. 387 // If this sampler has already been used, return the existing mapping.
388 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 388 const auto it =
389 [offset](const Sampler& entry) { return entry.offset == offset; }); 389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
390 if (it != used_samplers.end()) { 391 if (it != used_samplers.end()) {
391 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
392 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -399,8 +400,8 @@ std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
399 *info.is_shadow, *info.is_buffer, false); 400 *info.is_shadow, *info.is_buffer, false);
400} 401}
401 402
402std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
403 Node& index_var) { 404 SamplerInfo info, Node& index_var) {
404 const Node sampler_register = GetRegister(reg); 405 const Node sampler_register = GetRegister(reg);
405 const auto [base_node, tracked_sampler_info] = 406 const auto [base_node, tracked_sampler_info] =
406 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
@@ -416,7 +417,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
416 417
417 // If this sampler has already been used, return the existing mapping. 418 // If this sampler has already been used, return the existing mapping.
418 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
419 [buffer, offset](const Sampler& entry) { 420 [buffer, offset](const SamplerEntry& entry) {
420 return entry.buffer == buffer && entry.offset == offset; 421 return entry.buffer == buffer && entry.offset == offset;
421 }); 422 });
422 if (it != used_samplers.end()) { 423 if (it != used_samplers.end()) {
@@ -436,11 +437,12 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
436 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); 437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
437 438
438 // Try to use an already created sampler if it exists 439 // Try to use an already created sampler if it exists
439 const auto it = std::find_if( 440 const auto it =
440 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { 441 std::find_if(used_samplers.begin(), used_samplers.end(),
441 return offsets == std::pair{entry.offset, entry.secondary_offset} && 442 [indices, offsets](const SamplerEntry& entry) {
442 indices == std::pair{entry.buffer, entry.secondary_buffer}; 443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
443 }); 444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
444 if (it != used_samplers.end()) { 446 if (it != used_samplers.end()) {
445 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && 447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
446 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); 448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
@@ -460,7 +462,7 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
460 // If this sampler has already been used, return the existing mapping. 462 // If this sampler has already been used, return the existing mapping.
461 const auto it = std::find_if( 463 const auto it = std::find_if(
462 used_samplers.begin(), used_samplers.end(), 464 used_samplers.begin(), used_samplers.end(),
463 [base_offset](const Sampler& entry) { return entry.offset == base_offset; }); 465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
464 if (it != used_samplers.end()) { 466 if (it != used_samplers.end()) {
465 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && 467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
466 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && 468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
@@ -565,9 +567,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
565 info.is_buffer = false; 567 info.is_buffer = false;
566 568
567 Node index_var; 569 Node index_var;
568 const std::optional<Sampler> sampler = is_bindless 570 const std::optional<SamplerEntry> sampler =
569 ? GetBindlessSampler(*bindless_reg, info, index_var) 571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
570 : GetSampler(instr.sampler, info); 572 : GetSampler(instr.sampler, info);
571 if (!sampler) { 573 if (!sampler) {
572 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; 574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
573 } 575 }
@@ -724,7 +726,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
724 info.is_shadow = depth_compare; 726 info.is_shadow = depth_compare;
725 727
726 Node index_var; 728 Node index_var;
727 const std::optional<Sampler> sampler = 729 const std::optional<SamplerEntry> sampler =
728 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) 730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
729 : GetSampler(instr.sampler, info); 731 : GetSampler(instr.sampler, info);
730 Node4 values; 732 Node4 values;
@@ -783,7 +785,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
783 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
784 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
785 787
786 const std::optional<Sampler> sampler = GetSampler(instr.sampler, {}); 788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
787 789
788 Node4 values; 790 Node4 values;
789 for (u32 element = 0; element < values.size(); ++element) { 791 for (u32 element = 0; element < values.size(); ++element) {
@@ -800,7 +802,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
800 info.type = texture_type; 802 info.type = texture_type;
801 info.is_array = is_array; 803 info.is_array = is_array;
802 info.is_shadow = false; 804 info.is_shadow = false;
803 const std::optional<Sampler> sampler = GetSampler(instr.sampler, info); 805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
804 806
805 const std::size_t type_coord_count = GetCoordCount(texture_type); 807 const std::size_t type_coord_count = GetCoordCount(texture_type);
806 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; 808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 8db9e1de7..b54d33763 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -282,25 +282,24 @@ struct SeparateSamplerNode;
282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
283using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
284 284
285struct Sampler { 285struct SamplerEntry {
286 /// Bound samplers constructor 286 /// Bound samplers constructor
287 constexpr explicit Sampler(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, 287 explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
288 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) 288 bool is_shadow_, bool is_buffer_, bool is_indexed_)
289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, 289 : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {} 290 is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
291 291
292 /// Separate sampler constructor 292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index_, std::pair<u32, u32> offsets_, 293 explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 std::pair<u32, u32> buffers_, Tegra::Shader::TextureType type_, 294 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
295 bool is_array_, bool is_shadow_, bool is_buffer_) 295 bool is_buffer_)
296 : index{index_}, offset{offsets_.first}, secondary_offset{offsets_.second}, 296 : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers_.first}, secondary_buffer{buffers_.second}, type{type_}, 297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
298 is_array{is_array_}, is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} 298 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
299 299
300 /// Bindless samplers constructor 300 /// Bindless samplers constructor
301 constexpr explicit Sampler(u32 index_, u32 offset_, u32 buffer_, 301 explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
302 Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, 302 bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
303 bool is_buffer_, bool is_indexed_)
304 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, 303 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
305 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { 304 is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
306 } 305 }
@@ -340,14 +339,14 @@ struct BindlessSamplerNode {
340 u32 offset; 339 u32 offset;
341}; 340};
342 341
343struct Image { 342struct ImageEntry {
344public: 343public:
345 /// Bound images constructor 344 /// Bound images constructor
346 constexpr explicit Image(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) 345 explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
347 : index{index_}, offset{offset_}, type{type_} {} 346 : index{index_}, offset{offset_}, type{type_} {}
348 347
349 /// Bindless samplers constructor 348 /// Bindless samplers constructor
350 constexpr explicit Image(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) 349 explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
351 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} 350 : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
352 351
353 void MarkWrite() { 352 void MarkWrite() {
@@ -391,7 +390,7 @@ struct MetaArithmetic {
391 390
392/// Parameters describing a texture sampler 391/// Parameters describing a texture sampler
393struct MetaTexture { 392struct MetaTexture {
394 Sampler sampler; 393 SamplerEntry sampler;
395 Node array; 394 Node array;
396 Node depth_compare; 395 Node depth_compare;
397 std::vector<Node> aoffi; 396 std::vector<Node> aoffi;
@@ -405,7 +404,7 @@ struct MetaTexture {
405}; 404};
406 405
407struct MetaImage { 406struct MetaImage {
408 const Image& image; 407 const ImageEntry& image;
409 std::vector<Node> values; 408 std::vector<Node> values;
410 u32 element{}; 409 u32 element{};
411}; 410};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6aae14e34..0c6ab0f07 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -94,11 +94,11 @@ public:
94 return used_cbufs; 94 return used_cbufs;
95 } 95 }
96 96
97 const std::list<Sampler>& GetSamplers() const { 97 const std::list<SamplerEntry>& GetSamplers() const {
98 return used_samplers; 98 return used_samplers;
99 } 99 }
100 100
101 const std::list<Image>& GetImages() const { 101 const std::list<ImageEntry>& GetImages() const {
102 return used_images; 102 return used_images;
103 } 103 }
104 104
@@ -334,17 +334,17 @@ private:
334 std::optional<Tegra::Engines::SamplerDescriptor> sampler); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
338 338
339 /// Accesses a texture sampler for a bindless texture. 339 /// Accesses a texture sampler for a bindless texture.
340 std::optional<Sampler> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, 340 std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
341 Node& index_var); 341 Node& index_var);
342 342
343 /// Accesses an image. 343 /// Accesses an image.
344 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 344 ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
345 345
346 /// Access a bindless image sampler. 346 /// Access a bindless image sampler.
347 Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); 347 ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
348 348
349 /// Extracts a sequence of bits from a node 349 /// Extracts a sequence of bits from a node
350 Node BitfieldExtract(Node value, u32 offset, u32 bits); 350 Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -454,8 +454,8 @@ private:
454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes; 454 std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes; 455 std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
456 std::map<u32, ConstBuffer> used_cbufs; 456 std::map<u32, ConstBuffer> used_cbufs;
457 std::list<Sampler> used_samplers; 457 std::list<SamplerEntry> used_samplers;
458 std::list<Image> used_images; 458 std::list<ImageEntry> used_images;
459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 459 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 460 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
461 bool uses_layer{}; 461 bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 937e29d1e..6308aef94 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -280,7 +280,7 @@ bool IsPixelFormatSRGB(PixelFormat format) {
280} 280}
281 281
282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { 282std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
283 return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; 283 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
284} 284}
285 285
286} // namespace VideoCore::Surface 286} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index cfd12fa61..c40ab89d0 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -120,7 +120,7 @@ enum class PixelFormat {
120 Max = MaxDepthStencilFormat, 120 Max = MaxDepthStencilFormat,
121 Invalid = 255, 121 Invalid = 255,
122}; 122};
123static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); 123constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
124 124
125enum class SurfaceType { 125enum class SurfaceType {
126 ColorTexture = 0, 126 ColorTexture = 0,
@@ -140,117 +140,7 @@ enum class SurfaceTarget {
140 TextureCubeArray, 140 TextureCubeArray,
141}; 141};
142 142
143constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 143constexpr std::array<u32, MaxPixelFormat> BLOCK_WIDTH_TABLE = {{
144 0, // A8B8G8R8_UNORM
145 0, // A8B8G8R8_SNORM
146 0, // A8B8G8R8_SINT
147 0, // A8B8G8R8_UINT
148 0, // R5G6B5_UNORM
149 0, // B5G6R5_UNORM
150 0, // A1R5G5B5_UNORM
151 0, // A2B10G10R10_UNORM
152 0, // A2B10G10R10_UINT
153 0, // A1B5G5R5_UNORM
154 0, // R8_UNORM
155 0, // R8_SNORM
156 0, // R8_SINT
157 0, // R8_UINT
158 0, // R16G16B16A16_FLOAT
159 0, // R16G16B16A16_UNORM
160 0, // R16G16B16A16_SNORM
161 0, // R16G16B16A16_SINT
162 0, // R16G16B16A16_UINT
163 0, // B10G11R11_FLOAT
164 0, // R32G32B32A32_UINT
165 2, // BC1_RGBA_UNORM
166 2, // BC2_UNORM
167 2, // BC3_UNORM
168 2, // BC4_UNORM
169 2, // BC4_SNORM
170 2, // BC5_UNORM
171 2, // BC5_SNORM
172 2, // BC7_UNORM
173 2, // BC6H_UFLOAT
174 2, // BC6H_SFLOAT
175 2, // ASTC_2D_4X4_UNORM
176 0, // B8G8R8A8_UNORM
177 0, // R32G32B32A32_FLOAT
178 0, // R32G32B32A32_SINT
179 0, // R32G32_FLOAT
180 0, // R32G32_SINT
181 0, // R32_FLOAT
182 0, // R16_FLOAT
183 0, // R16_UNORM
184 0, // R16_SNORM
185 0, // R16_UINT
186 0, // R16_SINT
187 0, // R16G16_UNORM
188 0, // R16G16_FLOAT
189 0, // R16G16_UINT
190 0, // R16G16_SINT
191 0, // R16G16_SNORM
192 0, // R32G32B32_FLOAT
193 0, // A8B8G8R8_SRGB
194 0, // R8G8_UNORM
195 0, // R8G8_SNORM
196 0, // R8G8_SINT
197 0, // R8G8_UINT
198 0, // R32G32_UINT
199 0, // R16G16B16X16_FLOAT
200 0, // R32_UINT
201 0, // R32_SINT
202 2, // ASTC_2D_8X8_UNORM
203 2, // ASTC_2D_8X5_UNORM
204 2, // ASTC_2D_5X4_UNORM
205 0, // B8G8R8A8_SRGB
206 2, // BC1_RGBA_SRGB
207 2, // BC2_SRGB
208 2, // BC3_SRGB
209 2, // BC7_SRGB
210 0, // A4B4G4R4_UNORM
211 2, // ASTC_2D_4X4_SRGB
212 2, // ASTC_2D_8X8_SRGB
213 2, // ASTC_2D_8X5_SRGB
214 2, // ASTC_2D_5X4_SRGB
215 2, // ASTC_2D_5X5_UNORM
216 2, // ASTC_2D_5X5_SRGB
217 2, // ASTC_2D_10X8_UNORM
218 2, // ASTC_2D_10X8_SRGB
219 2, // ASTC_2D_6X6_UNORM
220 2, // ASTC_2D_6X6_SRGB
221 2, // ASTC_2D_10X10_UNORM
222 2, // ASTC_2D_10X10_SRGB
223 2, // ASTC_2D_12X12_UNORM
224 2, // ASTC_2D_12X12_SRGB
225 2, // ASTC_2D_8X6_UNORM
226 2, // ASTC_2D_8X6_SRGB
227 2, // ASTC_2D_6X5_UNORM
228 2, // ASTC_2D_6X5_SRGB
229 0, // E5B9G9R9_FLOAT
230 0, // D32_FLOAT
231 0, // D16_UNORM
232 0, // D24_UNORM_S8_UINT
233 0, // S8_UINT_D24_UNORM
234 0, // D32_FLOAT_S8_UINT
235}};
236
237/**
238 * Gets the compression factor for the specified PixelFormat. This applies to just the
239 * "compressed width" and "compressed height", not the overall compression factor of a
240 * compressed image. This is used for maintaining proper surface sizes for compressed
241 * texture formats.
242 */
243inline constexpr u32 GetCompressionFactorShift(PixelFormat format) {
244 DEBUG_ASSERT(format != PixelFormat::Invalid);
245 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size());
246 return compression_factor_shift_table[static_cast<std::size_t>(format)];
247}
248
249inline constexpr u32 GetCompressionFactor(PixelFormat format) {
250 return 1U << GetCompressionFactorShift(format);
251}
252
253constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
254 1, // A8B8G8R8_UNORM 144 1, // A8B8G8R8_UNORM
255 1, // A8B8G8R8_SNORM 145 1, // A8B8G8R8_SNORM
256 1, // A8B8G8R8_SINT 146 1, // A8B8G8R8_SINT
@@ -344,15 +234,12 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
344 1, // D32_FLOAT_S8_UINT 234 1, // D32_FLOAT_S8_UINT
345}}; 235}};
346 236
347static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { 237constexpr u32 DefaultBlockWidth(PixelFormat format) {
348 if (format == PixelFormat::Invalid) 238 ASSERT(static_cast<std::size_t>(format) < BLOCK_WIDTH_TABLE.size());
349 return 0; 239 return BLOCK_WIDTH_TABLE[static_cast<std::size_t>(format)];
350
351 ASSERT(static_cast<std::size_t>(format) < block_width_table.size());
352 return block_width_table[static_cast<std::size_t>(format)];
353} 240}
354 241
355constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 242constexpr std::array<u32, MaxPixelFormat> BLOCK_HEIGHT_TABLE = {{
356 1, // A8B8G8R8_UNORM 243 1, // A8B8G8R8_UNORM
357 1, // A8B8G8R8_SNORM 244 1, // A8B8G8R8_SNORM
358 1, // A8B8G8R8_SINT 245 1, // A8B8G8R8_SINT
@@ -446,15 +333,12 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
446 1, // D32_FLOAT_S8_UINT 333 1, // D32_FLOAT_S8_UINT
447}}; 334}};
448 335
449static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { 336constexpr u32 DefaultBlockHeight(PixelFormat format) {
450 if (format == PixelFormat::Invalid) 337 ASSERT(static_cast<std::size_t>(format) < BLOCK_HEIGHT_TABLE.size());
451 return 0; 338 return BLOCK_HEIGHT_TABLE[static_cast<std::size_t>(format)];
452
453 ASSERT(static_cast<std::size_t>(format) < block_height_table.size());
454 return block_height_table[static_cast<std::size_t>(format)];
455} 339}
456 340
457constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 341constexpr std::array<u32, MaxPixelFormat> BITS_PER_BLOCK_TABLE = {{
458 32, // A8B8G8R8_UNORM 342 32, // A8B8G8R8_UNORM
459 32, // A8B8G8R8_SNORM 343 32, // A8B8G8R8_SNORM
460 32, // A8B8G8R8_SINT 344 32, // A8B8G8R8_SINT
@@ -548,20 +432,14 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
548 64, // D32_FLOAT_S8_UINT 432 64, // D32_FLOAT_S8_UINT
549}}; 433}};
550 434
551static constexpr u32 GetFormatBpp(PixelFormat format) { 435constexpr u32 BitsPerBlock(PixelFormat format) {
552 if (format == PixelFormat::Invalid) 436 ASSERT(static_cast<std::size_t>(format) < BITS_PER_BLOCK_TABLE.size());
553 return 0; 437 return BITS_PER_BLOCK_TABLE[static_cast<std::size_t>(format)];
554
555 ASSERT(static_cast<std::size_t>(format) < bpp_table.size());
556 return bpp_table[static_cast<std::size_t>(format)];
557} 438}
558 439
559/// Returns the sizer in bytes of the specified pixel format 440/// Returns the sizer in bytes of the specified pixel format
560static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { 441constexpr u32 BytesPerBlock(PixelFormat pixel_format) {
561 if (pixel_format == PixelFormat::Invalid) { 442 return BitsPerBlock(pixel_format) / CHAR_BIT;
562 return 0;
563 }
564 return GetFormatBpp(pixel_format) / CHAR_BIT;
565} 443}
566 444
567SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 445SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/alignment.h"
9#include "common/common_types.h"
10#include "common/div_ceil.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/accelerated_swizzle.h"
13#include "video_core/texture_cache/util.h"
14#include "video_core/textures/decoders.h"
15
16namespace VideoCommon::Accelerated {
17
18using Tegra::Texture::GOB_SIZE_SHIFT;
19using Tegra::Texture::GOB_SIZE_X;
20using Tegra::Texture::GOB_SIZE_X_SHIFT;
21using Tegra::Texture::GOB_SIZE_Y_SHIFT;
22using VideoCore::Surface::BytesPerBlock;
23
24BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
25 const ImageInfo& info) {
26 const Extent3D block = swizzle.block;
27 const Extent3D num_tiles = swizzle.num_tiles;
28 const u32 bytes_per_block = BytesPerBlock(info.format);
29 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
30 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
31 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
32 return BlockLinearSwizzle2DParams{
33 .origin{0, 0, 0},
34 .destination{0, 0, 0},
35 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
36 .layer_stride = info.layer_stride,
37 .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
38 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
39 .block_height = block.height,
40 .block_height_mask = (1U << block.height) - 1,
41 };
42}
43
44BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
45 const ImageInfo& info) {
46 const Extent3D block = swizzle.block;
47 const Extent3D num_tiles = swizzle.num_tiles;
48 const u32 bytes_per_block = BytesPerBlock(info.format);
49 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
50 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
51
52 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
53 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
54 const u32 slice_size =
55 Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
56 return BlockLinearSwizzle3DParams{
57 .origin{0, 0, 0},
58 .destination{0, 0, 0},
59 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
60 .slice_size = slice_size,
61 .block_size = block_size,
62 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
63 .block_height = block.height,
64 .block_height_mask = (1U << block.height) - 1,
65 .block_depth = block.depth,
66 .block_depth_mask = (1U << block.depth) - 1,
67 };
68}
69
70} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon::Accelerated {
14
15struct BlockLinearSwizzle2DParams {
16 std::array<u32, 3> origin;
17 std::array<s32, 3> destination;
18 u32 bytes_per_block_log2;
19 u32 layer_stride;
20 u32 block_size;
21 u32 x_shift;
22 u32 block_height;
23 u32 block_height_mask;
24};
25
26struct BlockLinearSwizzle3DParams {
27 std::array<u32, 3> origin;
28 std::array<s32, 3> destination;
29 u32 bytes_per_block_log2;
30 u32 slice_size;
31 u32 block_size;
32 u32 x_shift;
33 u32 block_height;
34 u32 block_height_mask;
35 u32 block_depth;
36 u32 block_depth_mask;
37};
38
39[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
40 const SwizzleParameters& swizzle, const ImageInfo& info);
41
42[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
43 const SwizzleParameters& swizzle, const ImageInfo& info);
44
45} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 5b475fe06..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_,
13 u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_,
14 u32 depth_)
15 : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_},
16 dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_},
17 dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {}
18
19 constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_},
21 dest_level{level_}, width{width_}, height{height_}, depth{depth_} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <span>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/texture_cache/decode_bc4.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
18 const u32 code_offset = 16 + 3 * (4 * y + x);
19 const u32 code = (bits >> code_offset) & 7;
20 const u32 red0 = (bits >> 0) & 0xff;
21 const u32 red1 = (bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
65 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
66 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
67 static constexpr u32 BLOCK_SIZE = 4;
68 size_t input_offset = 0;
69 for (u32 slice = 0; slice < extent.depth; ++slice) {
70 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
71 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
72 u64 bits;
73 std::memcpy(&bits, &input[input_offset], sizeof(bits));
74 input_offset += sizeof(bits);
75
76 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
77 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
78 const u32 linear_z = slice;
79 const u32 linear_y = block_y * BLOCK_SIZE + y;
80 const u32 linear_x = block_x * BLOCK_SIZE + x;
81 const u32 offset_z = linear_z * extent.width * extent.height;
82 const u32 offset_y = linear_y * extent.width;
83 const u32 offset_x = linear_x;
84 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
85 const u32 color = DecompressBlock(bits, x, y);
86 output[output_offset + 0] = static_cast<u8>(color);
87 output[output_offset + 1] = 0;
88 output[output_offset + 2] = 0;
89 output[output_offset + 3] = 0xff;
90 }
91 }
92 }
93 }
94 }
95}
96
97} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
15
16} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/div_ceil.h"
12#include "common/logging/log.h"
13#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h"
15
16namespace VideoCommon {
17
18template <typename Descriptor>
19class DescriptorTable {
20public:
21 explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
22
23 [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
24 [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
25 return false;
26 }
27 Refresh(gpu_addr, limit);
28 return true;
29 }
30
31 void Invalidate() noexcept {
32 std::ranges::fill(read_descriptors, 0);
33 }
34
35 [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
36 DEBUG_ASSERT(index <= current_limit);
37 const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
38 std::pair<Descriptor, bool> result;
39 gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
40 if (IsDescriptorRead(index)) {
41 result.second = result.first != descriptors[index];
42 } else {
43 MarkDescriptorAsRead(index);
44 result.second = true;
45 }
46 if (result.second) {
47 descriptors[index] = result.first;
48 }
49 return result;
50 }
51
52 [[nodiscard]] u32 Limit() const noexcept {
53 return current_limit;
54 }
55
56private:
57 void Refresh(GPUVAddr gpu_addr, u32 limit) {
58 current_gpu_addr = gpu_addr;
59 current_limit = limit;
60
61 const size_t num_descriptors = static_cast<size_t>(limit) + 1;
62 read_descriptors.clear();
63 read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
64 descriptors.resize(num_descriptors);
65 }
66
67 void MarkDescriptorAsRead(u32 index) noexcept {
68 read_descriptors[index / 64] |= 1ULL << (index % 64);
69 }
70
71 [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
72 return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
73 }
74
75 Tegra::MemoryManager& gpu_memory;
76 GPUVAddr current_gpu_addr{};
77 u32 current_limit{};
78 std::vector<u64> read_descriptors;
79 std::vector<Descriptor> descriptors;
80};
81
82} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7938d71eb..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include "common/common_types.h" 5#include "common/common_types.h"
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8#include "video_core/texture_cache/format_lookup_table.h" 7#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
20constexpr auto SINT = ComponentType::SINT; 19constexpr auto SINT = ComponentType::SINT;
21constexpr auto UINT = ComponentType::UINT; 20constexpr auto UINT = ComponentType::UINT;
22constexpr auto FLOAT = ComponentType::FLOAT; 21constexpr auto FLOAT = ComponentType::FLOAT;
23constexpr bool C = false; // Normal color 22constexpr bool LINEAR = false;
24constexpr bool S = true; // Srgb 23constexpr bool SRGB = true;
25 24
26struct Table { 25constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
27 constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, 26 ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
28 ComponentType green_component_, ComponentType blue_component_, 27 u32 hash = is_srgb ? 1 : 0;
29 ComponentType alpha_component_, PixelFormat pixel_format_) 28 hash |= static_cast<u32>(red_component) << 1;
30 : texture_format{texture_format_}, pixel_format{pixel_format_}, 29 hash |= static_cast<u32>(green_component) << 4;
31 red_component{red_component_}, green_component{green_component_}, 30 hash |= static_cast<u32>(blue_component) << 7;
32 blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} 31 hash |= static_cast<u32>(alpha_component) << 10;
33 32 hash |= static_cast<u32>(format) << 13;
34 TextureFormat texture_format; 33 return hash;
35 PixelFormat pixel_format; 34}
36 ComponentType red_component;
37 ComponentType green_component;
38 ComponentType blue_component;
39 ComponentType alpha_component;
40 bool is_srgb;
41};
42constexpr std::array<Table, 86> DefinitionTable = {{
43 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
44 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
45 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
46 {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
47 {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
48
49 {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
50
51 {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
52 {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
53
54 {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
55
56 {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
57
58 {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
59 {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
60 {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
61 {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
62
63 {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
64 {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
65 {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
66 {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
67
68 {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
69 {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
70 {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
71 {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
72 {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
73
74 {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
75 {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
76 {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
77 {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
78 {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
79
80 {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
81 {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
82 {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
83 {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
84 {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
85
86 {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
87
88 {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
89 {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
90 {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
91
92 {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
93
94 {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
95 {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
96 {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
97
98 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
99 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
100 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
101
102 {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
103
104 {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
105 {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
106 {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
107 {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
108 {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
109
110 {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
111 {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
112
113 {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
114 {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
115
116 {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
117 {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
118
119 {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
120 {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
121
122 {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
123 {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
124
125 {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
126 {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
127
128 {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
129 {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
130
131 {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
132 {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
133
134 {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
135 {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
136
137 {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
138 {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
139
140 {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
141 {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
142
143 {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
144 {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
145
146 {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
147 {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
148
149 {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
150 {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
151
152 {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
153 {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
154
155 {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
156 {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
157
158 {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
159 {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
160 35
161 {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, 36constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
162 {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, 37 return Hash(format, component, component, component, component, is_srgb);
163}}; 38}
164 39
165} // Anonymous namespace 40} // Anonymous namespace
166 41
167FormatLookupTable::FormatLookupTable() { 42PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
168 table.fill(static_cast<u8>(PixelFormat::Invalid)); 43 ComponentType blue, ComponentType alpha,
169 44 bool is_srgb) noexcept {
170 for (const auto& entry : DefinitionTable) { 45 switch (Hash(format, red, green, blue, alpha, is_srgb)) {
171 table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, 46 case Hash(TextureFormat::A8R8G8B8, UNORM):
172 entry.green_component, entry.blue_component, entry.alpha_component)] = 47 return PixelFormat::A8B8G8R8_UNORM;
173 static_cast<u8>(entry.pixel_format); 48 case Hash(TextureFormat::A8R8G8B8, SNORM):
174 } 49 return PixelFormat::A8B8G8R8_SNORM;
175} 50 case Hash(TextureFormat::A8R8G8B8, UINT):
176 51 return PixelFormat::A8B8G8R8_UINT;
177PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, 52 case Hash(TextureFormat::A8R8G8B8, SINT):
178 ComponentType red_component, 53 return PixelFormat::A8B8G8R8_SINT;
179 ComponentType green_component, 54 case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
180 ComponentType blue_component, 55 return PixelFormat::A8B8G8R8_SRGB;
181 ComponentType alpha_component) const noexcept { 56 case Hash(TextureFormat::B5G6R5, UNORM):
182 const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( 57 return PixelFormat::B5G6R5_UNORM;
183 format, is_srgb, red_component, green_component, blue_component, alpha_component)]); 58 case Hash(TextureFormat::A2B10G10R10, UNORM):
184 // [[likely]] 59 return PixelFormat::A2B10G10R10_UNORM;
185 if (pixel_format != PixelFormat::Invalid) { 60 case Hash(TextureFormat::A2B10G10R10, UINT):
186 return pixel_format; 61 return PixelFormat::A2B10G10R10_UINT;
62 case Hash(TextureFormat::A1B5G5R5, UNORM):
63 return PixelFormat::A1B5G5R5_UNORM;
64 case Hash(TextureFormat::A4B4G4R4, UNORM):
65 return PixelFormat::A4B4G4R4_UNORM;
66 case Hash(TextureFormat::R8, UNORM):
67 return PixelFormat::R8_UNORM;
68 case Hash(TextureFormat::R8, SNORM):
69 return PixelFormat::R8_SNORM;
70 case Hash(TextureFormat::R8, UINT):
71 return PixelFormat::R8_UINT;
72 case Hash(TextureFormat::R8, SINT):
73 return PixelFormat::R8_SINT;
74 case Hash(TextureFormat::R8G8, UNORM):
75 return PixelFormat::R8G8_UNORM;
76 case Hash(TextureFormat::R8G8, SNORM):
77 return PixelFormat::R8G8_SNORM;
78 case Hash(TextureFormat::R8G8, UINT):
79 return PixelFormat::R8G8_UINT;
80 case Hash(TextureFormat::R8G8, SINT):
81 return PixelFormat::R8G8_SINT;
82 case Hash(TextureFormat::R16G16B16A16, FLOAT):
83 return PixelFormat::R16G16B16A16_FLOAT;
84 case Hash(TextureFormat::R16G16B16A16, UNORM):
85 return PixelFormat::R16G16B16A16_UNORM;
86 case Hash(TextureFormat::R16G16B16A16, SNORM):
87 return PixelFormat::R16G16B16A16_SNORM;
88 case Hash(TextureFormat::R16G16B16A16, UINT):
89 return PixelFormat::R16G16B16A16_UINT;
90 case Hash(TextureFormat::R16G16B16A16, SINT):
91 return PixelFormat::R16G16B16A16_SINT;
92 case Hash(TextureFormat::R16G16, FLOAT):
93 return PixelFormat::R16G16_FLOAT;
94 case Hash(TextureFormat::R16G16, UNORM):
95 return PixelFormat::R16G16_UNORM;
96 case Hash(TextureFormat::R16G16, SNORM):
97 return PixelFormat::R16G16_SNORM;
98 case Hash(TextureFormat::R16G16, UINT):
99 return PixelFormat::R16G16_UINT;
100 case Hash(TextureFormat::R16G16, SINT):
101 return PixelFormat::R16G16_SINT;
102 case Hash(TextureFormat::R16, FLOAT):
103 return PixelFormat::R16_FLOAT;
104 case Hash(TextureFormat::R16, UNORM):
105 return PixelFormat::R16_UNORM;
106 case Hash(TextureFormat::R16, SNORM):
107 return PixelFormat::R16_SNORM;
108 case Hash(TextureFormat::R16, UINT):
109 return PixelFormat::R16_UINT;
110 case Hash(TextureFormat::R16, SINT):
111 return PixelFormat::R16_SINT;
112 case Hash(TextureFormat::B10G11R11, FLOAT):
113 return PixelFormat::B10G11R11_FLOAT;
114 case Hash(TextureFormat::R32G32B32A32, FLOAT):
115 return PixelFormat::R32G32B32A32_FLOAT;
116 case Hash(TextureFormat::R32G32B32A32, UINT):
117 return PixelFormat::R32G32B32A32_UINT;
118 case Hash(TextureFormat::R32G32B32A32, SINT):
119 return PixelFormat::R32G32B32A32_SINT;
120 case Hash(TextureFormat::R32G32B32, FLOAT):
121 return PixelFormat::R32G32B32_FLOAT;
122 case Hash(TextureFormat::R32G32, FLOAT):
123 return PixelFormat::R32G32_FLOAT;
124 case Hash(TextureFormat::R32G32, UINT):
125 return PixelFormat::R32G32_UINT;
126 case Hash(TextureFormat::R32G32, SINT):
127 return PixelFormat::R32G32_SINT;
128 case Hash(TextureFormat::R32, FLOAT):
129 return PixelFormat::R32_FLOAT;
130 case Hash(TextureFormat::R32, UINT):
131 return PixelFormat::R32_UINT;
132 case Hash(TextureFormat::R32, SINT):
133 return PixelFormat::R32_SINT;
134 case Hash(TextureFormat::E5B9G9R9, FLOAT):
135 return PixelFormat::E5B9G9R9_FLOAT;
136 case Hash(TextureFormat::D32, FLOAT):
137 return PixelFormat::D32_FLOAT;
138 case Hash(TextureFormat::D16, UNORM):
139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
145 return PixelFormat::D32_FLOAT_S8_UINT;
146 case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
147 return PixelFormat::BC1_RGBA_UNORM;
148 case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
149 return PixelFormat::BC1_RGBA_SRGB;
150 case Hash(TextureFormat::BC2, UNORM, LINEAR):
151 return PixelFormat::BC2_UNORM;
152 case Hash(TextureFormat::BC2, UNORM, SRGB):
153 return PixelFormat::BC2_SRGB;
154 case Hash(TextureFormat::BC3, UNORM, LINEAR):
155 return PixelFormat::BC3_UNORM;
156 case Hash(TextureFormat::BC3, UNORM, SRGB):
157 return PixelFormat::BC3_SRGB;
158 case Hash(TextureFormat::BC4, UNORM):
159 return PixelFormat::BC4_UNORM;
160 case Hash(TextureFormat::BC4, SNORM):
161 return PixelFormat::BC4_SNORM;
162 case Hash(TextureFormat::BC5, UNORM):
163 return PixelFormat::BC5_UNORM;
164 case Hash(TextureFormat::BC5, SNORM):
165 return PixelFormat::BC5_SNORM;
166 case Hash(TextureFormat::BC7, UNORM, LINEAR):
167 return PixelFormat::BC7_UNORM;
168 case Hash(TextureFormat::BC7, UNORM, SRGB):
169 return PixelFormat::BC7_SRGB;
170 case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
171 return PixelFormat::BC6H_SFLOAT;
172 case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
173 return PixelFormat::BC6H_UFLOAT;
174 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
175 return PixelFormat::ASTC_2D_4X4_UNORM;
176 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
177 return PixelFormat::ASTC_2D_4X4_SRGB;
178 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
179 return PixelFormat::ASTC_2D_5X4_UNORM;
180 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
181 return PixelFormat::ASTC_2D_5X4_SRGB;
182 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
183 return PixelFormat::ASTC_2D_5X5_UNORM;
184 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
185 return PixelFormat::ASTC_2D_5X5_SRGB;
186 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
187 return PixelFormat::ASTC_2D_8X8_UNORM;
188 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
189 return PixelFormat::ASTC_2D_8X8_SRGB;
190 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
191 return PixelFormat::ASTC_2D_8X5_UNORM;
192 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
193 return PixelFormat::ASTC_2D_8X5_SRGB;
194 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
195 return PixelFormat::ASTC_2D_10X8_UNORM;
196 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
197 return PixelFormat::ASTC_2D_10X8_SRGB;
198 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
199 return PixelFormat::ASTC_2D_6X6_UNORM;
200 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
201 return PixelFormat::ASTC_2D_6X6_SRGB;
202 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
203 return PixelFormat::ASTC_2D_10X10_UNORM;
204 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
205 return PixelFormat::ASTC_2D_10X10_SRGB;
206 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
207 return PixelFormat::ASTC_2D_12X12_UNORM;
208 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
209 return PixelFormat::ASTC_2D_12X12_SRGB;
210 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
211 return PixelFormat::ASTC_2D_8X6_UNORM;
212 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
213 return PixelFormat::ASTC_2D_8X6_SRGB;
214 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
215 return PixelFormat::ASTC_2D_6X5_UNORM;
216 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
217 return PixelFormat::ASTC_2D_6X5_SRGB;
187 } 218 }
188 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", 219 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
189 static_cast<int>(format), is_srgb, static_cast<int>(red_component), 220 static_cast<int>(format), is_srgb, static_cast<int>(red),
190 static_cast<int>(green_component), static_cast<int>(blue_component), 221 static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
191 static_cast<int>(alpha_component));
192 return PixelFormat::A8B8G8R8_UNORM; 222 return PixelFormat::A8B8G8R8_UNORM;
193} 223}
194 224
195void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
196 ComponentType green_component, ComponentType blue_component,
197 ComponentType alpha_component, PixelFormat pixel_format) {}
198
199std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
200 ComponentType red_component,
201 ComponentType green_component,
202 ComponentType blue_component,
203 ComponentType alpha_component) noexcept {
204 const auto format_index = static_cast<std::size_t>(format);
205 const auto red_index = static_cast<std::size_t>(red_component);
206 const auto green_index = static_cast<std::size_t>(green_component);
207 const auto blue_index = static_cast<std::size_t>(blue_component);
208 const auto alpha_index = static_cast<std::size_t>(alpha_component);
209 const std::size_t srgb_index = is_srgb ? 1 : 0;
210
211 return format_index * PerFormat +
212 srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
213 alpha_index * PerComponent * PerComponent * PerComponent +
214 blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
215}
216
217} // namespace VideoCommon 225} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <limits>
9#include "video_core/surface.h" 7#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 8#include "video_core/textures/texture.h"
11 9
12namespace VideoCommon { 10namespace VideoCommon {
13 11
14class FormatLookupTable { 12VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
15public: 13 Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
16 explicit FormatLookupTable(); 14 Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
17 15 Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
18 VideoCore::Surface::PixelFormat GetPixelFormat(
19 Tegra::Texture::TextureFormat format, bool is_srgb,
20 Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
21 Tegra::Texture::ComponentType blue_component,
22 Tegra::Texture::ComponentType alpha_component) const noexcept;
23
24private:
25 static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
26
27 static constexpr std::size_t NumTextureFormats = 128;
28
29 static constexpr std::size_t PerComponent = 8;
30 static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
31 static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
32 static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
33 static constexpr std::size_t PerFormat = PerComponents4 * 2;
34
35 static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
36 Tegra::Texture::ComponentType red_component,
37 Tegra::Texture::ComponentType green_component,
38 Tegra::Texture::ComponentType blue_component,
39 Tegra::Texture::ComponentType alpha_component) noexcept;
40
41 void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
42 Tegra::Texture::ComponentType red_component,
43 Tegra::Texture::ComponentType green_component,
44 Tegra::Texture::ComponentType blue_component,
45 Tegra::Texture::ComponentType alpha_component,
46 VideoCore::Surface::PixelFormat pixel_format);
47
48 std::array<u8, NumTextureFormats * PerFormat> table;
49};
50 16
51} // namespace VideoCommon 17} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7
8#include "video_core/texture_cache/formatter.h"
9#include "video_core/texture_cache/image_base.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/render_targets.h"
13
14namespace VideoCommon {
15
16std::string Name(const ImageBase& image) {
17 const GPUVAddr gpu_addr = image.gpu_addr;
18 const ImageInfo& info = image.info;
19 const u32 width = info.size.width;
20 const u32 height = info.size.height;
21 const u32 depth = info.size.depth;
22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels;
24 std::string resource;
25 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers);
27 }
28 if (num_levels > 1) {
29 resource += fmt::format(":M{}", num_levels);
30 }
31 switch (image.info.type) {
32 case ImageType::e1D:
33 return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
34 case ImageType::e2D:
35 return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
36 case ImageType::e3D:
37 return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
38 case ImageType::Linear:
39 return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
40 case ImageType::Buffer:
41 return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
42 }
43 return "Invalid";
44}
45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth;
50 const u32 num_levels = image_view.range.extent.levels;
51 const u32 num_layers = image_view.range.extent.layers;
52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) {
55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D:
58 return fmt::format("ImageView 2D {}x{}{}", width, height, level);
59 case ImageViewType::Cube:
60 return fmt::format("ImageView Cube {}x{}{}", width, height, level);
61 case ImageViewType::e3D:
62 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
63 case ImageViewType::e1DArray:
64 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
65 case ImageViewType::e2DArray:
66 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
67 case ImageViewType::CubeArray:
68 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
69 case ImageViewType::Rect:
70 return fmt::format("ImageView Rect {}x{}{}", width, height, level);
71 case ImageViewType::Buffer:
72 return fmt::format("BufferView {}", width);
73 }
74 return "Invalid";
75}
76
77std::string Name(const RenderTargets& render_targets) {
78 std::string_view debug_prefix;
79 const auto num_color = std::ranges::count_if(
80 render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
81 if (render_targets.depth_buffer_id) {
82 debug_prefix = num_color > 0 ? "R" : "Z";
83 } else {
84 debug_prefix = num_color > 0 ? "C" : "X";
85 }
86 const Extent2D size = render_targets.size;
87 if (num_color > 0) {
88 return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
89 size.height);
90 } else {
91 return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
92 }
93}
94
95} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "video_core/surface.h"
12#include "video_core/texture_cache/types.h"
13
14template <>
15struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
16 template <typename FormatContext>
17 auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
18 using VideoCore::Surface::PixelFormat;
19 const string_view name = [format] {
20 switch (format) {
21 case PixelFormat::A8B8G8R8_UNORM:
22 return "A8B8G8R8_UNORM";
23 case PixelFormat::A8B8G8R8_SNORM:
24 return "A8B8G8R8_SNORM";
25 case PixelFormat::A8B8G8R8_SINT:
26 return "A8B8G8R8_SINT";
27 case PixelFormat::A8B8G8R8_UINT:
28 return "A8B8G8R8_UINT";
29 case PixelFormat::R5G6B5_UNORM:
30 return "R5G6B5_UNORM";
31 case PixelFormat::B5G6R5_UNORM:
32 return "B5G6R5_UNORM";
33 case PixelFormat::A1R5G5B5_UNORM:
34 return "A1R5G5B5_UNORM";
35 case PixelFormat::A2B10G10R10_UNORM:
36 return "A2B10G10R10_UNORM";
37 case PixelFormat::A2B10G10R10_UINT:
38 return "A2B10G10R10_UINT";
39 case PixelFormat::A1B5G5R5_UNORM:
40 return "A1B5G5R5_UNORM";
41 case PixelFormat::R8_UNORM:
42 return "R8_UNORM";
43 case PixelFormat::R8_SNORM:
44 return "R8_SNORM";
45 case PixelFormat::R8_SINT:
46 return "R8_SINT";
47 case PixelFormat::R8_UINT:
48 return "R8_UINT";
49 case PixelFormat::R16G16B16A16_FLOAT:
50 return "R16G16B16A16_FLOAT";
51 case PixelFormat::R16G16B16A16_UNORM:
52 return "R16G16B16A16_UNORM";
53 case PixelFormat::R16G16B16A16_SNORM:
54 return "R16G16B16A16_SNORM";
55 case PixelFormat::R16G16B16A16_SINT:
56 return "R16G16B16A16_SINT";
57 case PixelFormat::R16G16B16A16_UINT:
58 return "R16G16B16A16_UINT";
59 case PixelFormat::B10G11R11_FLOAT:
60 return "B10G11R11_FLOAT";
61 case PixelFormat::R32G32B32A32_UINT:
62 return "R32G32B32A32_UINT";
63 case PixelFormat::BC1_RGBA_UNORM:
64 return "BC1_RGBA_UNORM";
65 case PixelFormat::BC2_UNORM:
66 return "BC2_UNORM";
67 case PixelFormat::BC3_UNORM:
68 return "BC3_UNORM";
69 case PixelFormat::BC4_UNORM:
70 return "BC4_UNORM";
71 case PixelFormat::BC4_SNORM:
72 return "BC4_SNORM";
73 case PixelFormat::BC5_UNORM:
74 return "BC5_UNORM";
75 case PixelFormat::BC5_SNORM:
76 return "BC5_SNORM";
77 case PixelFormat::BC7_UNORM:
78 return "BC7_UNORM";
79 case PixelFormat::BC6H_UFLOAT:
80 return "BC6H_UFLOAT";
81 case PixelFormat::BC6H_SFLOAT:
82 return "BC6H_SFLOAT";
83 case PixelFormat::ASTC_2D_4X4_UNORM:
84 return "ASTC_2D_4X4_UNORM";
85 case PixelFormat::B8G8R8A8_UNORM:
86 return "B8G8R8A8_UNORM";
87 case PixelFormat::R32G32B32A32_FLOAT:
88 return "R32G32B32A32_FLOAT";
89 case PixelFormat::R32G32B32A32_SINT:
90 return "R32G32B32A32_SINT";
91 case PixelFormat::R32G32_FLOAT:
92 return "R32G32_FLOAT";
93 case PixelFormat::R32G32_SINT:
94 return "R32G32_SINT";
95 case PixelFormat::R32_FLOAT:
96 return "R32_FLOAT";
97 case PixelFormat::R16_FLOAT:
98 return "R16_FLOAT";
99 case PixelFormat::R16_UNORM:
100 return "R16_UNORM";
101 case PixelFormat::R16_SNORM:
102 return "R16_SNORM";
103 case PixelFormat::R16_UINT:
104 return "R16_UINT";
105 case PixelFormat::R16_SINT:
106 return "R16_SINT";
107 case PixelFormat::R16G16_UNORM:
108 return "R16G16_UNORM";
109 case PixelFormat::R16G16_FLOAT:
110 return "R16G16_FLOAT";
111 case PixelFormat::R16G16_UINT:
112 return "R16G16_UINT";
113 case PixelFormat::R16G16_SINT:
114 return "R16G16_SINT";
115 case PixelFormat::R16G16_SNORM:
116 return "R16G16_SNORM";
117 case PixelFormat::R32G32B32_FLOAT:
118 return "R32G32B32_FLOAT";
119 case PixelFormat::A8B8G8R8_SRGB:
120 return "A8B8G8R8_SRGB";
121 case PixelFormat::R8G8_UNORM:
122 return "R8G8_UNORM";
123 case PixelFormat::R8G8_SNORM:
124 return "R8G8_SNORM";
125 case PixelFormat::R8G8_SINT:
126 return "R8G8_SINT";
127 case PixelFormat::R8G8_UINT:
128 return "R8G8_UINT";
129 case PixelFormat::R32G32_UINT:
130 return "R32G32_UINT";
131 case PixelFormat::R16G16B16X16_FLOAT:
132 return "R16G16B16X16_FLOAT";
133 case PixelFormat::R32_UINT:
134 return "R32_UINT";
135 case PixelFormat::R32_SINT:
136 return "R32_SINT";
137 case PixelFormat::ASTC_2D_8X8_UNORM:
138 return "ASTC_2D_8X8_UNORM";
139 case PixelFormat::ASTC_2D_8X5_UNORM:
140 return "ASTC_2D_8X5_UNORM";
141 case PixelFormat::ASTC_2D_5X4_UNORM:
142 return "ASTC_2D_5X4_UNORM";
143 case PixelFormat::B8G8R8A8_SRGB:
144 return "B8G8R8A8_SRGB";
145 case PixelFormat::BC1_RGBA_SRGB:
146 return "BC1_RGBA_SRGB";
147 case PixelFormat::BC2_SRGB:
148 return "BC2_SRGB";
149 case PixelFormat::BC3_SRGB:
150 return "BC3_SRGB";
151 case PixelFormat::BC7_SRGB:
152 return "BC7_SRGB";
153 case PixelFormat::A4B4G4R4_UNORM:
154 return "A4B4G4R4_UNORM";
155 case PixelFormat::ASTC_2D_4X4_SRGB:
156 return "ASTC_2D_4X4_SRGB";
157 case PixelFormat::ASTC_2D_8X8_SRGB:
158 return "ASTC_2D_8X8_SRGB";
159 case PixelFormat::ASTC_2D_8X5_SRGB:
160 return "ASTC_2D_8X5_SRGB";
161 case PixelFormat::ASTC_2D_5X4_SRGB:
162 return "ASTC_2D_5X4_SRGB";
163 case PixelFormat::ASTC_2D_5X5_UNORM:
164 return "ASTC_2D_5X5_UNORM";
165 case PixelFormat::ASTC_2D_5X5_SRGB:
166 return "ASTC_2D_5X5_SRGB";
167 case PixelFormat::ASTC_2D_10X8_UNORM:
168 return "ASTC_2D_10X8_UNORM";
169 case PixelFormat::ASTC_2D_10X8_SRGB:
170 return "ASTC_2D_10X8_SRGB";
171 case PixelFormat::ASTC_2D_6X6_UNORM:
172 return "ASTC_2D_6X6_UNORM";
173 case PixelFormat::ASTC_2D_6X6_SRGB:
174 return "ASTC_2D_6X6_SRGB";
175 case PixelFormat::ASTC_2D_10X10_UNORM:
176 return "ASTC_2D_10X10_UNORM";
177 case PixelFormat::ASTC_2D_10X10_SRGB:
178 return "ASTC_2D_10X10_SRGB";
179 case PixelFormat::ASTC_2D_12X12_UNORM:
180 return "ASTC_2D_12X12_UNORM";
181 case PixelFormat::ASTC_2D_12X12_SRGB:
182 return "ASTC_2D_12X12_SRGB";
183 case PixelFormat::ASTC_2D_8X6_UNORM:
184 return "ASTC_2D_8X6_UNORM";
185 case PixelFormat::ASTC_2D_8X6_SRGB:
186 return "ASTC_2D_8X6_SRGB";
187 case PixelFormat::ASTC_2D_6X5_UNORM:
188 return "ASTC_2D_6X5_UNORM";
189 case PixelFormat::ASTC_2D_6X5_SRGB:
190 return "ASTC_2D_6X5_SRGB";
191 case PixelFormat::E5B9G9R9_FLOAT:
192 return "E5B9G9R9_FLOAT";
193 case PixelFormat::D32_FLOAT:
194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM";
197 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM:
200 return "S8_UINT_D24_UNORM";
201 case PixelFormat::D32_FLOAT_S8_UINT:
202 return "D32_FLOAT_S8_UINT";
203 case PixelFormat::MaxDepthStencilFormat:
204 case PixelFormat::Invalid:
205 return "Invalid";
206 }
207 return "Invalid";
208 }();
209 return formatter<string_view>::format(name, ctx);
210 }
211};
212
213template <>
214struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
215 template <typename FormatContext>
216 auto format(VideoCommon::ImageType type, FormatContext& ctx) {
217 const string_view name = [type] {
218 using VideoCommon::ImageType;
219 switch (type) {
220 case ImageType::e1D:
221 return "1D";
222 case ImageType::e2D:
223 return "2D";
224 case ImageType::e3D:
225 return "3D";
226 case ImageType::Linear:
227 return "Linear";
228 case ImageType::Buffer:
229 return "Buffer";
230 }
231 return "Invalid";
232 }();
233 return formatter<string_view>::format(name, ctx);
234 }
235};
236
237template <>
238struct fmt::formatter<VideoCommon::Extent3D> {
239 constexpr auto parse(fmt::format_parse_context& ctx) {
240 return ctx.begin();
241 }
242
243 template <typename FormatContext>
244 auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
245 return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
246 extent.depth);
247 }
248};
249
250namespace VideoCommon {
251
252struct ImageBase;
253struct ImageViewBase;
254struct RenderTargets;
255
256[[nodiscard]] std::string Name(const ImageBase& image);
257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view,
259 std::optional<ImageViewType> type = std::nullopt);
260
261[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262
263} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..448a05fcc
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,216 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/formatter.h"
13#include "video_core/texture_cache/image_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/util.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::DefaultBlockHeight;
20using VideoCore::Surface::DefaultBlockWidth;
21
22namespace {
23/// Returns the base layer and mip level offset
24[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
25 if (layer_stride == 0) {
26 return {0, diff};
27 } else {
28 return {diff / layer_stride, diff % layer_stride};
29 }
30}
31
32[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
33 return layers.base_level < info.resources.levels &&
34 layers.base_layer + layers.num_layers <= info.resources.layers;
35}
36
37[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
38 const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
39 const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
40 if (!ValidateLayers(copy.src_subresource, src)) {
41 return false;
42 }
43 if (!ValidateLayers(copy.dst_subresource, dst)) {
44 return false;
45 }
46 if (copy.src_offset.x + copy.extent.width > src_size.width ||
47 copy.src_offset.y + copy.extent.height > src_size.height ||
48 copy.src_offset.z + copy.extent.depth > src_size.depth) {
49 return false;
50 }
51 if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
52 copy.dst_offset.y + copy.extent.height > dst_size.height ||
53 copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
54 return false;
55 }
56 return true;
57}
58} // Anonymous namespace
59
60ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
61 : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
62 unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
63 converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
64 cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
65 mip_level_offsets{CalculateMipLevelOffsets(info)} {
66 if (info.type == ImageType::e3D) {
67 slice_offsets = CalculateSliceOffsets(info);
68 slice_subresources = CalculateSliceSubresources(info);
69 }
70}
71
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base
75 return std::nullopt;
76 }
77 const u32 diff = static_cast<u32>(other_addr - gpu_addr);
78 if (diff > guest_size_bytes) {
79 // This can happen when two CPU addresses are used for different GPU addresses
80 return std::nullopt;
81 }
82 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
86 if (layer > info.resources.layers || it == end) {
87 return std::nullopt;
88 }
89 return SubresourceBase{
90 .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
91 .layer = layer,
92 };
93 } else {
94 // TODO: Consider using binary_search after a threshold
95 const auto it = std::ranges::find(slice_offsets, diff);
96 if (it == slice_offsets.cend()) {
97 return std::nullopt;
98 }
99 return slice_subresources[std::distance(slice_offsets.begin(), it)];
100 }
101}
102
103ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
104 const auto it = std::ranges::find(image_view_infos, view_info);
105 if (it == image_view_infos.end()) {
106 return ImageViewId{};
107 }
108 return image_view_ids[std::distance(image_view_infos.begin(), it)];
109}
110
111void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
112 image_view_infos.push_back(view_info);
113 image_view_ids.push_back(image_view_id);
114}
115
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type);
119 std::optional<SubresourceBase> base;
120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else {
123 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS);
124 }
125 if (!base) {
126 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
127 return;
128 }
129 const PixelFormat lhs_format = lhs.info.format;
130 const PixelFormat rhs_format = rhs.info.format;
131 const Extent2D lhs_block{
132 .width = DefaultBlockWidth(lhs_format),
133 .height = DefaultBlockHeight(lhs_format),
134 };
135 const Extent2D rhs_block{
136 .width = DefaultBlockWidth(rhs_format),
137 .height = DefaultBlockHeight(rhs_format),
138 };
139 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
140 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
141 if (is_lhs_compressed && is_rhs_compressed) {
142 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
143 return;
144 }
145 const s32 lhs_mips = lhs.info.resources.levels;
146 const s32 rhs_mips = rhs.info.resources.levels;
147 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
148 AliasedImage lhs_alias;
149 AliasedImage rhs_alias;
150 lhs_alias.id = rhs_id;
151 rhs_alias.id = lhs_id;
152 lhs_alias.copies.reserve(num_mips);
153 rhs_alias.copies.reserve(num_mips);
154 for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
155 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
156 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
157 if (is_lhs_compressed) {
158 lhs_size.width /= lhs_block.width;
159 lhs_size.height /= lhs_block.height;
160 }
161 if (is_rhs_compressed) {
162 rhs_size.width /= rhs_block.width;
163 rhs_size.height /= rhs_block.height;
164 }
165 const Extent3D copy_size{
166 .width = std::min(lhs_size.width, rhs_size.width),
167 .height = std::min(lhs_size.height, rhs_size.height),
168 .depth = std::min(lhs_size.depth, rhs_size.depth),
169 };
170 if (copy_size.width == 0 || copy_size.height == 0) {
171 LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
172 continue;
173 }
174 const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
175 const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
176 const Offset3D lhs_offset{0, 0, 0};
177 const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
178 const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
179 const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
180 const s32 num_layers = std::min(lhs_layers, rhs_layers);
181 const SubresourceLayers lhs_subresource{
182 .base_level = mip_level,
183 .base_layer = 0,
184 .num_layers = num_layers,
185 };
186 const SubresourceLayers rhs_subresource{
187 .base_level = base->level + mip_level,
188 .base_layer = is_rhs_3d ? 0 : base->layer,
189 .num_layers = num_layers,
190 };
191 [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
192 .src_subresource = lhs_subresource,
193 .dst_subresource = rhs_subresource,
194 .src_offset = lhs_offset,
195 .dst_offset = rhs_offset,
196 .extent = copy_size,
197 });
198 [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
199 .src_subresource = rhs_subresource,
200 .dst_subresource = lhs_subresource,
201 .src_offset = rhs_offset,
202 .dst_offset = lhs_offset,
203 .extent = copy_size,
204 });
205 ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
206 ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
207 }
208 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
209 if (lhs_alias.copies.empty()) {
210 return;
211 }
212 lhs.aliased_images.push_back(std::move(lhs_alias));
213 rhs.aliased_images.push_back(std::move(rhs_alias));
214}
215
216} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/texture_cache/image_info.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19enum class ImageFlagBits : u32 {
20 AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
21 Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
22 CpuModified = 1 << 2, ///< Contents have been modified from the CPU
23 GpuModified = 1 << 3, ///< Contents have been modified from the GPU
24 Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30
31struct ImageViewInfo;
32
33struct AliasedImage {
34 std::vector<ImageCopy> copies;
35 ImageId id;
36};
37
38struct ImageBase {
39 explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
40
41 [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
42
43 [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
44
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 }
51
52 ImageInfo info;
53
54 u32 guest_size_bytes = 0;
55 u32 unswizzled_size_bytes = 0;
56 u32 converted_size_bytes = 0;
57 ImageFlagBits flags = ImageFlagBits::CpuModified;
58
59 GPUVAddr gpu_addr = 0;
60 VAddr cpu_addr = 0;
61 VAddr cpu_addr_end = 0;
62
63 u64 modification_tick = 0;
64 u64 frame_tick = 0;
65
66 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
67
68 std::vector<ImageViewInfo> image_view_infos;
69 std::vector<ImageViewId> image_view_ids;
70
71 std::vector<u32> slice_offsets;
72 std::vector<SubresourceBase> slice_subresources;
73
74 std::vector<AliasedImage> aliased_images;
75};
76
77struct ImageAllocBase {
78 std::vector<ImageId> images;
79};
80
81void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
82
83} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/surface.h"
7#include "video_core/texture_cache/format_lookup_table.h"
8#include "video_core/texture_cache/image_info.h"
9#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::TextureType;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20ImageInfo::ImageInfo(const TICEntry& config) noexcept {
21 format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
22 config.a_type, config.srgb_conversion);
23 num_samples = NumSamples(config.msaa_mode);
24 resources.levels = config.max_mip_level + 1;
25 if (config.IsPitchLinear()) {
26 pitch = config.Pitch();
27 } else if (config.IsBlockLinear()) {
28 block = Extent3D{
29 .width = config.block_width,
30 .height = config.block_height,
31 .depth = config.block_depth,
32 };
33 }
34 tile_width_spacing = config.tile_width_spacing;
35 if (config.texture_type != TextureType::Texture2D &&
36 config.texture_type != TextureType::Texture2DNoMipmap) {
37 ASSERT(!config.IsPitchLinear());
38 }
39 switch (config.texture_type) {
40 case TextureType::Texture1D:
41 ASSERT(config.BaseLayer() == 0);
42 type = ImageType::e1D;
43 size.width = config.Width();
44 break;
45 case TextureType::Texture1DArray:
46 UNIMPLEMENTED_IF(config.BaseLayer() != 0);
47 type = ImageType::e1D;
48 size.width = config.Width();
49 resources.layers = config.Depth();
50 break;
51 case TextureType::Texture2D:
52 case TextureType::Texture2DNoMipmap:
53 ASSERT(config.Depth() == 1);
54 type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
55 size.width = config.Width();
56 size.height = config.Height();
57 resources.layers = config.BaseLayer() + 1;
58 break;
59 case TextureType::Texture2DArray:
60 type = ImageType::e2D;
61 size.width = config.Width();
62 size.height = config.Height();
63 resources.layers = config.BaseLayer() + config.Depth();
64 break;
65 case TextureType::TextureCubemap:
66 ASSERT(config.Depth() == 1);
67 type = ImageType::e2D;
68 size.width = config.Width();
69 size.height = config.Height();
70 resources.layers = config.BaseLayer() + 6;
71 break;
72 case TextureType::TextureCubeArray:
73 UNIMPLEMENTED_IF(config.load_store_hint != 0);
74 type = ImageType::e2D;
75 size.width = config.Width();
76 size.height = config.Height();
77 resources.layers = config.BaseLayer() + config.Depth() * 6;
78 break;
79 case TextureType::Texture3D:
80 ASSERT(config.BaseLayer() == 0);
81 type = ImageType::e3D;
82 size.width = config.Width();
83 size.height = config.Height();
84 size.depth = config.Depth();
85 break;
86 case TextureType::Texture1DBuffer:
87 type = ImageType::Buffer;
88 size.width = config.Width();
89 break;
90 default:
91 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
92 break;
93 }
94 if (type != ImageType::Linear) {
95 // FIXME: Call this without passing *this
96 layer_stride = CalculateLayerStride(*this);
97 maybe_unaligned_layer_stride = CalculateLayerSize(*this);
98 }
99}
100
101ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
102 const auto& rt = regs.rt[index];
103 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
104 if (rt.tile_mode.is_pitch_linear) {
105 ASSERT(rt.tile_mode.is_3d == 0);
106 type = ImageType::Linear;
107 pitch = rt.width;
108 size = Extent3D{
109 .width = pitch / BytesPerBlock(format),
110 .height = rt.height,
111 .depth = 1,
112 };
113 return;
114 }
115 size.width = rt.width;
116 size.height = rt.height;
117 layer_stride = rt.layer_stride * 4;
118 maybe_unaligned_layer_stride = layer_stride;
119 num_samples = NumSamples(regs.multisample_mode);
120 block = Extent3D{
121 .width = rt.tile_mode.block_width,
122 .height = rt.tile_mode.block_height,
123 .depth = rt.tile_mode.block_depth,
124 };
125 if (rt.tile_mode.is_3d) {
126 type = ImageType::e3D;
127 size.depth = rt.depth;
128 } else {
129 type = ImageType::e2D;
130 resources.layers = rt.depth;
131 }
132}
133
134ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
135 format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
136 size.width = regs.zeta_width;
137 size.height = regs.zeta_height;
138 resources.levels = 1;
139 layer_stride = regs.zeta.layer_stride * 4;
140 maybe_unaligned_layer_stride = layer_stride;
141 num_samples = NumSamples(regs.multisample_mode);
142 block = Extent3D{
143 .width = regs.zeta.tile_mode.block_width,
144 .height = regs.zeta.tile_mode.block_height,
145 .depth = regs.zeta.tile_mode.block_depth,
146 };
147 if (regs.zeta.tile_mode.is_pitch_linear) {
148 ASSERT(regs.zeta.tile_mode.is_3d == 0);
149 type = ImageType::Linear;
150 pitch = size.width * BytesPerBlock(format);
151 } else if (regs.zeta.tile_mode.is_3d) {
152 ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
153 type = ImageType::e3D;
154 size.depth = regs.zeta_depth;
155 } else {
156 type = ImageType::e2D;
157 resources.layers = regs.zeta_depth;
158 }
159}
160
161ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
162 UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
163 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
164 if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
165 type = ImageType::Linear;
166 size = Extent3D{
167 .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
168 .height = config.height,
169 .depth = 1,
170 };
171 pitch = config.pitch;
172 } else {
173 type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
174 block = Extent3D{
175 .width = config.block_width,
176 .height = config.block_height,
177 .depth = config.block_depth,
178 };
179 // 3D blits with more than once slice are not implemented for now
180 // Render to individual slices
181 size = Extent3D{
182 .width = config.width,
183 .height = config.height,
184 .depth = 1,
185 };
186 }
187}
188
189} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14using Tegra::Texture::TICEntry;
15using VideoCore::Surface::PixelFormat;
16
17struct ImageInfo {
18 explicit ImageInfo() = default;
19 explicit ImageInfo(const TICEntry& config) noexcept;
20 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
21 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
22 explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
23
24 PixelFormat format = PixelFormat::Invalid;
25 ImageType type = ImageType::e1D;
26 SubresourceExtent resources;
27 Extent3D size{1, 1, 1};
28 union {
29 Extent3D block{0, 0, 0};
30 u32 pitch;
31 };
32 u32 layer_stride = 0;
33 u32 maybe_unaligned_layer_stride = 0;
34 u32 num_samples = 1;
35 u32 tile_width_spacing = 0;
36};
37
38} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..076a4bcfd
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "core/settings.h"
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_info.h"
13#include "video_core/texture_cache/image_view_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
20 ImageId image_id_)
21 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
22 size{
23 .width = std::max(image_info.size.width >> range.base.level, 1u),
24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format),
28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
31 if (image_info.type == ImageType::Linear && is_async) {
32 flags |= ImageViewFlagBits::PreemtiveDownload;
33 }
34 if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
35 flags |= ImageViewFlagBits::Slice;
36 }
37}
38
39ImageViewBase::ImageViewBase(const NullImageParams&) {}
40
41} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h"
10
11namespace VideoCommon {
12
13using VideoCore::Surface::PixelFormat;
14
15struct ImageViewInfo;
16struct ImageInfo;
17
18struct NullImageParams {};
19
20enum class ImageViewFlagBits : u16 {
21 PreemtiveDownload = 1 << 0,
22 Strong = 1 << 1,
23 Slice = 1 << 2,
24};
25DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
26
27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id);
30 explicit ImageViewBase(const NullImageParams&);
31
32 [[nodiscard]] bool IsBuffer() const noexcept {
33 return type == ImageViewType::Buffer;
34 }
35
36 ImageId image_id{};
37 PixelFormat format{};
38 ImageViewType type{};
39 SubresourceRange range;
40 Extent3D size{0, 0, 0};
41 ImageViewFlagBits flags{};
42
43 u64 invalidation_tick = 0;
44 u64 modification_tick = 0;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h"
12
13namespace VideoCommon {
14
15namespace {
16
17constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
18
19[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
20 const u8 casted = static_cast<u8>(source);
21 ASSERT(static_cast<SwizzleSource>(casted) == source);
22 return casted;
23}
24
25} // Anonymous namespace
26
27ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
28 : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
29 y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
30 w_source{CastSwizzle(config.w_source)} {
31 range.base = SubresourceBase{
32 .level = static_cast<s32>(config.res_min_mip_level),
33 .layer = base_layer,
34 };
35 range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
36
37 switch (config.texture_type) {
38 case TextureType::Texture1D:
39 ASSERT(config.Height() == 1);
40 ASSERT(config.Depth() == 1);
41 type = ImageViewType::e1D;
42 break;
43 case TextureType::Texture2D:
44 case TextureType::Texture2DNoMipmap:
45 ASSERT(config.Depth() == 1);
46 type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
47 break;
48 case TextureType::Texture3D:
49 type = ImageViewType::e3D;
50 break;
51 case TextureType::TextureCubemap:
52 ASSERT(config.Depth() == 1);
53 type = ImageViewType::Cube;
54 range.extent.layers = 6;
55 break;
56 case TextureType::Texture1DArray:
57 type = ImageViewType::e1DArray;
58 range.extent.layers = config.Depth();
59 break;
60 case TextureType::Texture2DArray:
61 type = ImageViewType::e2DArray;
62 range.extent.layers = config.Depth();
63 break;
64 case TextureType::Texture1DBuffer:
65 type = ImageViewType::Buffer;
66 break;
67 case TextureType::TextureCubeArray:
68 type = ImageViewType::CubeArray;
69 range.extent.layers = config.Depth() * 6;
70 break;
71 default:
72 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
73 break;
74 }
75}
76
77ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
78 SubresourceRange range_) noexcept
79 : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
80 y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
81 w_source{RENDER_TARGET_SWIZZLE} {}
82
83bool ImageViewInfo::IsRenderTarget() const noexcept {
84 return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
85 z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
86}
87
88} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9
10#include "video_core/surface.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::SwizzleSource;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20/// Properties used to determine a image view
21struct ImageViewInfo {
22 explicit ImageViewInfo() noexcept = default;
23 explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
24 explicit ImageViewInfo(ImageViewType type, PixelFormat format,
25 SubresourceRange range = {}) noexcept;
26
27 auto operator<=>(const ImageViewInfo&) const noexcept = default;
28
29 [[nodiscard]] bool IsRenderTarget() const noexcept;
30
31 [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
32 return std::array{
33 static_cast<SwizzleSource>(x_source),
34 static_cast<SwizzleSource>(y_source),
35 static_cast<SwizzleSource>(z_source),
36 static_cast<SwizzleSource>(w_source),
37 };
38 }
39
40 ImageViewType type{};
41 PixelFormat format{};
42 SubresourceRange range;
43 u8 x_source = static_cast<u8>(SwizzleSource::R);
44 u8 y_source = static_cast<u8>(SwizzleSource::G);
45 u8 z_source = static_cast<u8>(SwizzleSource::B);
46 u8 w_source = static_cast<u8>(SwizzleSource::A);
47};
48static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
49
50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <span>
9#include <utility>
10
11#include "common/bit_cast.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16/// Framebuffer properties used to lookup a framebuffer
17struct RenderTargets {
18 constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
19
20 constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
21 const auto contains = [elements](ImageViewId item) {
22 return std::ranges::find(elements, item) != elements.end();
23 };
24 return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
25 }
26
27 std::array<ImageViewId, NUM_RT> color_buffer_ids;
28 ImageViewId depth_buffer_id;
29 std::array<u8, NUM_RT> draw_buffers{};
30 Extent2D size;
31};
32
33} // namespace VideoCommon
34
35namespace std {
36
37template <>
38struct hash<VideoCommon::RenderTargets> {
39 size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
40 using VideoCommon::ImageViewId;
41 size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
42 for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
43 value ^= std::hash<ImageViewId>{}(color_buffer_id);
44 }
45 value ^= Common::BitCast<u64>(rt.draw_buffers);
46 value ^= Common::BitCast<u64>(rt.size);
47 return value;
48 }
49};
50
51} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/assert.h"
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
15 switch (num_samples) {
16 case 1:
17 return {0, 0};
18 case 2:
19 return {1, 0};
20 case 4:
21 return {1, 1};
22 case 8:
23 return {2, 1};
24 case 16:
25 return {2, 2};
26 }
27 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
28 return {1, 1};
29}
30
31[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
32 using Tegra::Texture::MsaaMode;
33 switch (msaa_mode) {
34 case MsaaMode::Msaa1x1:
35 return 1;
36 case MsaaMode::Msaa2x1:
37 case MsaaMode::Msaa2x1_D3D:
38 return 2;
39 case MsaaMode::Msaa2x2:
40 case MsaaMode::Msaa2x2_VC4:
41 case MsaaMode::Msaa2x2_VC12:
42 return 4;
43 case MsaaMode::Msaa4x2:
44 case MsaaMode::Msaa4x2_D3D:
45 case MsaaMode::Msaa4x2_VC8:
46 case MsaaMode::Msaa4x2_VC24:
47 return 8;
48 case MsaaMode::Msaa4x4:
49 return 16;
50 }
51 UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
52 return 1;
53}
54
55} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <concepts>
9#include <numeric>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public:
35 ~SlotVector() noexcept {
36 size_t index = 0;
37 for (u64 bits : stored_bitset) {
38 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
39 if ((bits & 1) != 0) {
40 values[index + bit].object.~T();
41 }
42 }
43 index += 64;
44 }
45 delete[] values;
46 }
47
48 [[nodiscard]] T& operator[](SlotId id) noexcept {
49 ValidateIndex(id);
50 return values[id.index].object;
51 }
52
53 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
54 ValidateIndex(id);
55 return values[id.index].object;
56 }
57
58 template <typename... Args>
59 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
60 const u32 index = FreeValueIndex();
61 new (&values[index].object) T(std::forward<Args>(args)...);
62 SetStorageBit(index);
63
64 return SlotId{index};
65 }
66
67 void erase(SlotId id) noexcept {
68 values[id.index].object.~T();
69 free_list.push_back(id.index);
70 ResetStorageBit(id.index);
71 }
72
73private:
74 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {}
76 };
77
78 union Entry {
79 Entry() noexcept : dummy{} {}
80 ~Entry() noexcept {}
81
82 NonTrivialDummy dummy;
83 T object;
84 };
85
86 void SetStorageBit(u32 index) noexcept {
87 stored_bitset[index / 64] |= u64(1) << (index % 64);
88 }
89
90 void ResetStorageBit(u32 index) noexcept {
91 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
92 }
93
94 bool ReadStorageBit(u32 index) noexcept {
95 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
96 }
97
98 void ValidateIndex(SlotId id) const noexcept {
99 DEBUG_ASSERT(id);
100 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
101 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
102 }
103
104 [[nodiscard]] u32 FreeValueIndex() noexcept {
105 if (free_list.empty()) {
106 Reserve(values_capacity ? (values_capacity << 1) : 1);
107 }
108 const u32 free_index = free_list.back();
109 free_list.pop_back();
110 return free_index;
111 }
112
113 void Reserve(size_t new_capacity) noexcept {
114 Entry* const new_values = new Entry[new_capacity];
115 size_t index = 0;
116 for (u64 bits : stored_bitset) {
117 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
118 const size_t i = index + bit;
119 if ((bits & 1) == 0) {
120 continue;
121 }
122 T& old_value = values[i].object;
123 new (&new_values[i].object) T(std::move(old_value));
124 old_value.~T();
125 }
126 index += 64;
127 }
128
129 stored_bitset.resize((new_capacity + 63) / 64);
130
131 const size_t old_free_size = free_list.size();
132 free_list.resize(old_free_size + (new_capacity - values_capacity));
133 std::iota(free_list.begin() + old_free_size, free_list.end(),
134 static_cast<u32>(values_capacity));
135
136 delete[] values;
137 values = new_values;
138 values_capacity = new_capacity;
139 }
140
141 Entry* values = nullptr;
142 size_t values_capacity = 0;
143 size_t values_size = 0;
144
145 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list;
147};
148
149} // namespace VideoCommon
150
151template <>
152struct std::hash<VideoCommon::SlotId> {
153 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
154 return std::hash<u32>{}(id.index);
155 }
156};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index efbcf6723..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/algorithm.h"
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/memory_manager.h"
10#include "video_core/texture_cache/surface_base.h"
11#include "video_core/texture_cache/surface_params.h"
12#include "video_core/textures/convert.h"
13
14namespace VideoCommon {
15
16MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
17MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
18
19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
23
24StagingCache::StagingCache() = default;
25
26StagingCache::~StagingCache() = default;
27
28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
29 bool is_astc_supported_)
30 : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
35 std::size_t offset = 0;
36 for (u32 level = 0; level < params.num_levels; ++level) {
37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
38 mipmap_sizes[level] = mipmap_size;
39 mipmap_offsets[level] = offset;
40 offset += mipmap_size;
41 }
42 layer_size = offset;
43 if (params.is_layered) {
44 if (params.is_tiled) {
45 layer_size =
46 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
47 }
48 guest_memory_size = layer_size * params.depth;
49 } else {
50 guest_memory_size = layer_size;
51 }
52}
53
54MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
55 const u32 src_bpp{params.GetBytesPerPixel()};
56 const u32 dst_bpp{rhs.GetBytesPerPixel()};
57 const bool ib1 = params.IsBuffer();
58 const bool ib2 = rhs.IsBuffer();
59 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
60 const bool cb1 = params.IsCompressed();
61 const bool cb2 = rhs.IsCompressed();
62 if (cb1 == cb2) {
63 return MatchTopologyResult::FullMatch;
64 }
65 return MatchTopologyResult::CompressUnmatch;
66 }
67 return MatchTopologyResult::None;
68}
69
70MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
71 // Buffer surface Check
72 if (params.IsBuffer()) {
73 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
74 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
75 if (wd1 == wd2) {
76 return MatchStructureResult::FullMatch;
77 }
78 return MatchStructureResult::None;
79 }
80
81 // Linear Surface check
82 if (!params.is_tiled) {
83 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
84 if (params.width == rhs.width) {
85 return MatchStructureResult::FullMatch;
86 } else {
87 return MatchStructureResult::SemiMatch;
88 }
89 }
90 return MatchStructureResult::None;
91 }
92
93 // Tiled Surface check
94 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
95 params.tile_width_spacing, params.num_levels) ==
96 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
97 rhs.tile_width_spacing, rhs.num_levels)) {
98 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
99 return MatchStructureResult::FullMatch;
100 }
101 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
102 rhs.pixel_format);
103 const u32 hs =
104 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
105 const u32 w1 = params.GetBlockAlignedWidth();
106 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
107 return MatchStructureResult::SemiMatch;
108 }
109 }
110 return MatchStructureResult::None;
111}
112
113std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
114 const GPUVAddr candidate_gpu_addr) const {
115 if (gpu_addr == candidate_gpu_addr) {
116 return {{0, 0}};
117 }
118
119 if (candidate_gpu_addr < gpu_addr) {
120 return std::nullopt;
121 }
122
123 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
124 const auto layer{static_cast<u32>(relative_address / layer_size)};
125 if (layer >= params.depth) {
126 return std::nullopt;
127 }
128
129 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
130 const auto mipmap_it =
131 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
132 if (mipmap_it == mipmap_offsets.end()) {
133 return std::nullopt;
134 }
135
136 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
137 return std::make_pair(layer, level);
138}
139
140std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
141 const u32 layers{params.depth};
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
145
146 for (u32 layer = 0; layer < layers; layer++) {
147 for (u32 level = 0; level < mipmaps; level++) {
148 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
149 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
150 result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
151 }
152 }
153 return result;
154}
155
156std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
157 const u32 mipmaps{params.num_levels};
158 std::vector<CopyParams> result;
159 result.reserve(mipmaps);
160
161 for (u32 level = 0; level < mipmaps; level++) {
162 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
163 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
164 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
165 result.emplace_back(width, height, depth, level);
166 }
167 return result;
168}
169
170void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory,
171 const SurfaceParams& surface_params, u8* buffer, u32 level) {
172 const u32 width{surface_params.GetMipWidth(level)};
173 const u32 height{surface_params.GetMipHeight(level)};
174 const u32 block_height{surface_params.GetMipBlockHeight(level)};
175 const u32 block_depth{surface_params.GetMipBlockDepth(level)};
176
177 std::size_t guest_offset{mipmap_offsets[level]};
178 if (surface_params.is_layered) {
179 std::size_t host_offset = 0;
180 const std::size_t guest_stride = layer_size;
181 const std::size_t host_stride = surface_params.GetHostLayerSize(level);
182 for (u32 layer = 0; layer < surface_params.depth; ++layer) {
183 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height,
184 block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset,
185 memory + guest_offset);
186 guest_offset += guest_stride;
187 host_offset += host_stride;
188 }
189 } else {
190 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth,
191 surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer,
192 memory + guest_offset);
193 }
194}
195
196void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
197 StagingCache& staging_cache) {
198 MICROPROFILE_SCOPE(GPU_Load_Texture);
199 auto& staging_buffer = staging_cache.GetBuffer(0);
200 u8* host_ptr;
201 // Use an extra temporal buffer
202 auto& tmp_buffer = staging_cache.GetBuffer(1);
203 tmp_buffer.resize(guest_memory_size);
204 host_ptr = tmp_buffer.data();
205 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
206
207 if (params.is_tiled) {
208 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
209 params.block_width, static_cast<u32>(params.target));
210 for (u32 level = 0; level < params.num_levels; ++level) {
211 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
212 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
213 staging_buffer.data() + host_offset, level);
214 }
215 } else {
216 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
217 const u32 bpp{params.GetBytesPerPixel()};
218 const u32 block_width{params.GetDefaultBlockWidth()};
219 const u32 block_height{params.GetDefaultBlockHeight()};
220 const u32 width{(params.width + block_width - 1) / block_width};
221 const u32 height{(params.height + block_height - 1) / block_height};
222 const u32 copy_size{width * bpp};
223 if (params.pitch == copy_size) {
224 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
225 } else {
226 const u8* start{host_ptr};
227 u8* write_to{staging_buffer.data()};
228 for (u32 h = height; h > 0; --h) {
229 std::memcpy(write_to, start, copy_size);
230 start += params.pitch;
231 write_to += copy_size;
232 }
233 }
234 }
235
236 if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
237 return;
238 }
239
240 for (u32 level = params.num_levels; level--;) {
241 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
242 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
243 u8* const in_buffer = staging_buffer.data() + in_host_offset;
244 u8* const out_buffer = staging_buffer.data() + out_host_offset;
245 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
246 params.GetMipWidth(level), params.GetMipHeight(level),
247 params.GetMipDepth(level), true, true);
248 }
249}
250
251void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
252 StagingCache& staging_cache) {
253 MICROPROFILE_SCOPE(GPU_Flush_Texture);
254 auto& staging_buffer = staging_cache.GetBuffer(0);
255 u8* host_ptr;
256
257 // Use an extra temporal buffer
258 auto& tmp_buffer = staging_cache.GetBuffer(1);
259 tmp_buffer.resize(guest_memory_size);
260 host_ptr = tmp_buffer.data();
261
262 if (params.target == SurfaceTarget::Texture3D) {
263 // Special case for 3D texture segments
264 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
265 }
266
267 if (params.is_tiled) {
268 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
269 for (u32 level = 0; level < params.num_levels; ++level) {
270 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
271 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
272 staging_buffer.data() + host_offset, level);
273 }
274 } else if (params.IsBuffer()) {
275 // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
276 // memory.
277 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
278 } else {
279 ASSERT(params.target == SurfaceTarget::Texture2D);
280 ASSERT(params.num_levels == 1);
281
282 const u32 bpp{params.GetBytesPerPixel()};
283 const u32 copy_size{params.width * bpp};
284 if (params.pitch == copy_size) {
285 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
286 } else {
287 u8* start{host_ptr};
288 const u8* read_to{staging_buffer.data()};
289 for (u32 h = params.height; h > 0; --h) {
290 std::memcpy(start, read_to, copy_size);
291 start += params.pitch;
292 read_to += copy_size;
293 }
294 }
295 }
296 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
297}
298
299} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index b57135fe4..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <tuple>
9#include <unordered_map>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/gpu.h"
14#include "video_core/morton.h"
15#include "video_core/texture_cache/copy_params.h"
16#include "video_core/texture_cache/surface_params.h"
17#include "video_core/texture_cache/surface_view.h"
18
19namespace Tegra {
20class MemoryManager;
21}
22
23namespace VideoCommon {
24
25using VideoCore::MortonSwizzleMode;
26using VideoCore::Surface::SurfaceTarget;
27
28enum class MatchStructureResult : u32 {
29 FullMatch = 0,
30 SemiMatch = 1,
31 None = 2,
32};
33
34enum class MatchTopologyResult : u32 {
35 FullMatch = 0,
36 CompressUnmatch = 1,
37 None = 2,
38};
39
40class StagingCache {
41public:
42 explicit StagingCache();
43 ~StagingCache();
44
45 std::vector<u8>& GetBuffer(std::size_t index) {
46 return staging_buffer[index];
47 }
48
49 const std::vector<u8>& GetBuffer(std::size_t index) const {
50 return staging_buffer[index];
51 }
52
53 void SetSize(std::size_t size) {
54 staging_buffer.resize(size);
55 }
56
57private:
58 std::vector<std::vector<u8>> staging_buffer;
59};
60
61class SurfaceBaseImpl {
62public:
63 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
64
65 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
66
67 GPUVAddr GetGpuAddr() const {
68 return gpu_addr;
69 }
70
71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cpu_addr < end) && (cpu_addr_end > start);
73 }
74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
76 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
77 return gpu_addr <= other_start && other_end <= gpu_addr_end;
78 }
79
80 // Use only when recycling a surface
81 void SetGpuAddr(const GPUVAddr new_addr) {
82 gpu_addr = new_addr;
83 }
84
85 VAddr GetCpuAddr() const {
86 return cpu_addr;
87 }
88
89 VAddr GetCpuAddrEnd() const {
90 return cpu_addr_end;
91 }
92
93 void SetCpuAddr(const VAddr new_addr) {
94 cpu_addr = new_addr;
95 cpu_addr_end = new_addr + guest_memory_size;
96 }
97
98 const SurfaceParams& GetSurfaceParams() const {
99 return params;
100 }
101
102 std::size_t GetSizeInBytes() const {
103 return guest_memory_size;
104 }
105
106 std::size_t GetHostSizeInBytes() const {
107 return host_memory_size;
108 }
109
110 std::size_t GetMipmapSize(const u32 level) const {
111 return mipmap_sizes[level];
112 }
113
114 bool IsLinear() const {
115 return !params.is_tiled;
116 }
117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
123 return params.pixel_format == pixel_format;
124 }
125
126 VideoCore::Surface::PixelFormat GetFormat() const {
127 return params.pixel_format;
128 }
129
130 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
131 return params.target == target;
132 }
133
134 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
135
136 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
137
138 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
139 return std::tie(gpu_addr, params.target, params.num_levels) ==
140 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
141 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
142 }
143
144 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
145
146 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
147 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
148 }
149
150protected:
151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
152 bool is_astc_supported_);
153 ~SurfaceBaseImpl() = default;
154
155 virtual void DecorateSurfaceName() = 0;
156
157 const SurfaceParams params;
158 std::size_t layer_size;
159 std::size_t guest_memory_size;
160 std::size_t host_memory_size;
161 GPUVAddr gpu_addr{};
162 VAddr cpu_addr{};
163 VAddr cpu_addr_end{};
164 bool is_converted{};
165
166 std::vector<std::size_t> mipmap_sizes;
167 std::vector<std::size_t> mipmap_offsets;
168
169private:
170 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params,
171 u8* buffer, u32 level);
172
173 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
174
175 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
176};
177
178template <typename TView>
179class SurfaceBase : public SurfaceBaseImpl {
180public:
181 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
182
183 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
184
185 void MarkAsModified(bool is_modified_, u64 tick) {
186 is_modified = is_modified_ || is_target;
187 modification_tick = tick;
188 }
189
190 void MarkAsRenderTarget(bool is_target_, u32 index_) {
191 is_target = is_target_;
192 index = index_;
193 }
194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
211 void MarkAsPicked(bool is_picked_) {
212 is_picked = is_picked_;
213 }
214
215 bool IsModified() const {
216 return is_modified;
217 }
218
219 bool IsProtected() const {
220 // Only 3D slices are to be protected
221 return is_target && params.target == SurfaceTarget::Texture3D;
222 }
223
224 bool IsRenderTarget() const {
225 return is_target;
226 }
227
228 u32 GetRenderTarget() const {
229 return index;
230 }
231
232 bool IsRegistered() const {
233 return is_registered;
234 }
235
236 bool IsPicked() const {
237 return is_picked;
238 }
239
240 void MarkAsRegistered(bool is_reg) {
241 is_registered = is_reg;
242 }
243
244 u64 GetModificationTick() const {
245 return modification_tick;
246 }
247
248 TView EmplaceOverview(const SurfaceParams& overview_params) {
249 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 }
252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
259 const GPUVAddr view_addr,
260 const std::size_t candidate_size, const u32 mipmap,
261 const u32 layer) {
262 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
263 if (!layer_mipmap) {
264 return {};
265 }
266 const auto [end_layer, end_mipmap] = *layer_mipmap;
267 if (layer != end_layer) {
268 if (mipmap == 0 && end_mipmap == 0) {
269 return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
270 }
271 return {};
272 } else {
273 return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
274 }
275 }
276
277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
278 const std::size_t candidate_size) {
279 if (params.target == SurfaceTarget::Texture3D ||
280 view_params.target == SurfaceTarget::Texture3D ||
281 (params.num_levels == 1 && !params.is_layered)) {
282 return {};
283 }
284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
285 if (!layer_mipmap) {
286 return {};
287 }
288 const auto [layer, mipmap] = *layer_mipmap;
289 if (GetMipmapSize(mipmap) != candidate_size) {
290 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
291 }
292 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
293 }
294
295 TView GetMainView() const {
296 return main_view;
297 }
298
299protected:
300 explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_,
301 bool is_astc_supported_)
302 : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {}
303
304 ~SurfaceBase() = default;
305
306 virtual TView CreateView(const ViewParams& view_key) = 0;
307
308 TView main_view;
309 std::unordered_map<ViewParams, TView> views;
310
311private:
312 TView GetView(const ViewParams& key) {
313 const auto [entry, is_cache_miss] = views.try_emplace(key);
314 auto& view{entry->second};
315 if (is_cache_miss) {
316 view = CreateView(key);
317 }
318 return view;
319 }
320
321 static constexpr u32 NO_RT = 0xFFFFFFFF;
322
323 bool is_modified{};
324 bool is_target{};
325 bool is_registered{};
326 bool is_picked{};
327 bool is_memory_marked{};
328 bool is_sync_pending{};
329 u32 index{NO_RT};
330 u64 modification_tick{};
331};
332
333} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index 96f93246d..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "core/core.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/format_lookup_table.h"
15#include "video_core/texture_cache/surface_params.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::SurfaceTarget;
23using VideoCore::Surface::SurfaceTargetFromTextureType;
24using VideoCore::Surface::SurfaceType;
25
26namespace {
27
28SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
29 switch (type) {
30 case Tegra::Shader::TextureType::Texture1D:
31 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
32 case Tegra::Shader::TextureType::Texture2D:
33 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
34 case Tegra::Shader::TextureType::Texture3D:
35 ASSERT(!is_array);
36 return SurfaceTarget::Texture3D;
37 case Tegra::Shader::TextureType::TextureCube:
38 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
39 default:
40 UNREACHABLE();
41 return SurfaceTarget::Texture2D;
42 }
43}
44
45SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
46 switch (type) {
47 case Tegra::Shader::ImageType::Texture1D:
48 return SurfaceTarget::Texture1D;
49 case Tegra::Shader::ImageType::TextureBuffer:
50 return SurfaceTarget::TextureBuffer;
51 case Tegra::Shader::ImageType::Texture1DArray:
52 return SurfaceTarget::Texture1DArray;
53 case Tegra::Shader::ImageType::Texture2D:
54 return SurfaceTarget::Texture2D;
55 case Tegra::Shader::ImageType::Texture2DArray:
56 return SurfaceTarget::Texture2DArray;
57 case Tegra::Shader::ImageType::Texture3D:
58 return SurfaceTarget::Texture3D;
59 default:
60 UNREACHABLE();
61 return SurfaceTarget::Texture2D;
62 }
63}
64
65constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
66 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
67}
68
69} // Anonymous namespace
70
71SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
72 const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format = lookup_table.GetPixelFormat(
82 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16_UNORM:
87 case PixelFormat::R16_FLOAT:
88 params.pixel_format = PixelFormat::D16_UNORM;
89 break;
90 case PixelFormat::R32_FLOAT:
91 params.pixel_format = PixelFormat::D32_FLOAT;
92 break;
93 default:
94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
95 static_cast<u32>(params.pixel_format));
96 }
97 params.type = GetFormatType(params.pixel_format);
98 }
99 // TODO: on 1DBuffer we should use the tic info.
100 if (tic.IsBuffer()) {
101 params.target = SurfaceTarget::TextureBuffer;
102 params.width = tic.Width();
103 params.pitch = params.width * params.GetBytesPerPixel();
104 params.height = 1;
105 params.depth = 1;
106 params.num_levels = 1;
107 params.emulated_levels = 1;
108 params.is_layered = false;
109 } else {
110 params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
111 params.width = tic.Width();
112 params.height = tic.Height();
113 params.depth = tic.Depth();
114 params.pitch = params.is_tiled ? 0 : tic.Pitch();
115 if (params.target == SurfaceTarget::TextureCubemap ||
116 params.target == SurfaceTarget::TextureCubeArray) {
117 params.depth *= 6;
118 }
119 params.num_levels = tic.max_mip_level + 1;
120 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
121 params.is_layered = params.IsLayered();
122 }
123 return params;
124}
125
126SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
127 const Tegra::Texture::TICEntry& tic,
128 const VideoCommon::Shader::Image& entry) {
129 SurfaceParams params;
130 params.is_tiled = tic.IsTiled();
131 params.srgb_conversion = tic.IsSrgbConversionEnabled();
132 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
133 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
134 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
135 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
136 params.pixel_format = lookup_table.GetPixelFormat(
137 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
138 params.type = GetFormatType(params.pixel_format);
139 params.target = ImageTypeToSurfaceTarget(entry.type);
140 // TODO: on 1DBuffer we should use the tic info.
141 if (tic.IsBuffer()) {
142 params.target = SurfaceTarget::TextureBuffer;
143 params.width = tic.Width();
144 params.pitch = params.width * params.GetBytesPerPixel();
145 params.height = 1;
146 params.depth = 1;
147 params.num_levels = 1;
148 params.emulated_levels = 1;
149 params.is_layered = false;
150 } else {
151 params.width = tic.Width();
152 params.height = tic.Height();
153 params.depth = tic.Depth();
154 params.pitch = params.is_tiled ? 0 : tic.Pitch();
155 if (params.target == SurfaceTarget::TextureCubemap ||
156 params.target == SurfaceTarget::TextureCubeArray) {
157 params.depth *= 6;
158 }
159 params.num_levels = tic.max_mip_level + 1;
160 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
161 params.is_layered = params.IsLayered();
162 }
163 return params;
164}
165
166SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
167 const auto& regs = maxwell3d.regs;
168 const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
169 const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
170 const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
171 return {
172 .is_tiled = regs.zeta.memory_layout.type ==
173 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
174 .srgb_conversion = false,
175 .is_layered = is_layered,
176 .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
177 .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
178 .block_depth = block_depth,
179 .tile_width_spacing = 1,
180 .width = regs.zeta_width,
181 .height = regs.zeta_height,
182 .depth = is_layered ? regs.zeta_layers.Value() : 1U,
183 .pitch = 0,
184 .num_levels = 1,
185 .emulated_levels = 1,
186 .pixel_format = pixel_format,
187 .type = GetFormatType(pixel_format),
188 .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
189 };
190}
191
192SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
193 std::size_t index) {
194 const auto& config{maxwell3d.regs.rt[index]};
195 SurfaceParams params;
196 params.is_tiled =
197 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
198 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
199 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
200 params.block_width = config.memory_layout.block_width;
201 params.block_height = config.memory_layout.block_height;
202 params.block_depth = config.memory_layout.block_depth;
203 params.tile_width_spacing = 1;
204 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
205 params.type = GetFormatType(params.pixel_format);
206 if (params.is_tiled) {
207 params.pitch = 0;
208 params.width = config.width;
209 } else {
210 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
211 params.pitch = config.width;
212 params.width = params.pitch / bpp;
213 }
214 params.height = config.height;
215 params.num_levels = 1;
216 params.emulated_levels = 1;
217
218 if (config.memory_layout.is_3d != 0) {
219 params.depth = config.layers.Value();
220 params.is_layered = false;
221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
231 return params;
232}
233
234SurfaceParams SurfaceParams::CreateForFermiCopySurface(
235 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
236 const bool is_tiled = !config.linear;
237 const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
238
239 SurfaceParams params{
240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .is_layered = false,
244 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
245 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
246 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
247 .tile_width_spacing = 1,
248 .width = config.width,
249 .height = config.height,
250 .depth = 1,
251 .pitch = config.pitch,
252 .num_levels = 1,
253 .emulated_levels = 1,
254 .pixel_format = pixel_format,
255 .type = GetFormatType(pixel_format),
256 // TODO(Rodrigo): Try to guess texture arrays from parameters
257 .target = SurfaceTarget::Texture2D,
258 };
259
260 params.is_layered = params.IsLayered();
261 return params;
262}
263
264VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
265 const VideoCommon::Shader::Sampler& entry) {
266 return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
267}
268
269VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
270 const VideoCommon::Shader::Image& entry) {
271 return ImageTypeToSurfaceTarget(entry.type);
272}
273
274bool SurfaceParams::IsLayered() const {
275 switch (target) {
276 case SurfaceTarget::Texture1DArray:
277 case SurfaceTarget::Texture2DArray:
278 case SurfaceTarget::TextureCubemap:
279 case SurfaceTarget::TextureCubeArray:
280 return true;
281 default:
282 return false;
283 }
284}
285
286// Auto block resizing algorithm from:
287// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
288u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
289 if (level == 0) {
290 return this->block_height;
291 }
292
293 const u32 height_new{GetMipHeight(level)};
294 const u32 default_block_height{GetDefaultBlockHeight()};
295 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
296 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
297 return std::clamp(block_height_new, 3U, 7U) - 3U;
298}
299
300u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
301 if (level == 0) {
302 return this->block_depth;
303 }
304 if (is_layered) {
305 return 0;
306 }
307
308 const u32 depth_new{GetMipDepth(level)};
309 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
310 if (block_depth_new > 4) {
311 return 5 - (GetMipBlockHeight(level) >= 2);
312 }
313 return block_depth_new;
314}
315
316std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
317 std::size_t offset = 0;
318 for (u32 i = 0; i < level; i++) {
319 offset += GetInnerMipmapMemorySize(i, false, false);
320 }
321 return offset;
322}
323
324std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
325 std::size_t offset = 0;
326 if (is_converted) {
327 for (u32 i = 0; i < level; ++i) {
328 offset += GetConvertedMipmapSize(i) * GetNumLayers();
329 }
330 } else {
331 for (u32 i = 0; i < level; ++i) {
332 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
333 }
334 }
335 return offset;
336}
337
338std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
339 constexpr std::size_t rgba8_bpp = 4ULL;
340 const std::size_t mip_width = GetMipWidth(level);
341 const std::size_t mip_height = GetMipHeight(level);
342 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
343 return mip_width * mip_height * mip_depth * rgba8_bpp;
344}
345
346std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
347 std::size_t size = 0;
348 for (u32 level = 0; level < num_levels; ++level) {
349 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
350 }
351 if (is_tiled && is_layered) {
352 return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
353 }
354 return size;
355}
356
357std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
358 bool uncompressed) const {
359 const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
360 const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
361 const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)};
362 if (is_tiled) {
363 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width,
364 mip_height, mip_depth, GetMipBlockHeight(level),
365 GetMipBlockDepth(level));
366 } else if (as_host_size || IsBuffer()) {
367 return GetBytesPerPixel() * mip_width * mip_height * mip_depth;
368 } else {
369 // Linear Texture Case
370 return pitch * mip_height * mip_depth;
371 }
372}
373
374bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
375 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
376 height, depth, pitch, num_levels, pixel_format, type, target) ==
377 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
378 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
379 rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
380}
381
382std::string SurfaceParams::TargetName() const {
383 switch (target) {
384 case SurfaceTarget::Texture1D:
385 return "1D";
386 case SurfaceTarget::TextureBuffer:
387 return "TexBuffer";
388 case SurfaceTarget::Texture2D:
389 return "2D";
390 case SurfaceTarget::Texture3D:
391 return "3D";
392 case SurfaceTarget::Texture1DArray:
393 return "1DArray";
394 case SurfaceTarget::Texture2DArray:
395 return "2DArray";
396 case SurfaceTarget::TextureCubemap:
397 return "Cube";
398 case SurfaceTarget::TextureCubeArray:
399 return "CubeArray";
400 default:
401 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
402 UNREACHABLE();
403 return fmt::format("TUK({})", target);
404 }
405}
406
407u32 SurfaceParams::GetBlockSize() const {
408 const u32 x = 64U << block_width;
409 const u32 y = 8U << block_height;
410 const u32 z = 1U << block_depth;
411 return x * y * z;
412}
413
414std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
415 const u32 x_pixels = 64U / GetBytesPerPixel();
416 const u32 x = x_pixels << block_width;
417 const u32 y = 8U << block_height;
418 return {x, y};
419}
420
421std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
422 const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
423 const u32 block_size = GetBlockSize();
424 const u32 block_index = offset / block_size;
425 const u32 gob_offset = offset % block_size;
426 const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
427 const u32 x_gob_pixels = 64U / GetBytesPerPixel();
428 const u32 x_block_pixels = x_gob_pixels << block_width;
429 const u32 y_block_pixels = 8U << block_height;
430 const u32 z_block_pixels = 1U << block_depth;
431 const u32 x_blocks = div_ceil(width, x_block_pixels);
432 const u32 y_blocks = div_ceil(height, y_block_pixels);
433 const u32 z_blocks = div_ceil(depth, z_block_pixels);
434 const u32 base_x = block_index % x_blocks;
435 const u32 base_y = (block_index / x_blocks) % y_blocks;
436 const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
437 u32 x = base_x * x_block_pixels;
438 u32 y = base_y * y_block_pixels;
439 u32 z = base_z * z_block_pixels;
440 z += gob_index >> block_height;
441 y += (gob_index * 8U) % y_block_pixels;
442 return {x, y, z};
443}
444
445} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21class FormatLookupTable;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
27 const Tegra::Texture::TICEntry& tic,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams from an image configuration.
31 static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
32 const Tegra::Texture::TICEntry& tic,
33 const VideoCommon::Shader::Image& entry);
34
35 /// Creates SurfaceCachedParams for a depth buffer configuration.
36 static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
37
38 /// Creates SurfaceCachedParams from a framebuffer configuration.
39 static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
40 std::size_t index);
41
42 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
43 static SurfaceParams CreateForFermiCopySurface(
44 const Tegra::Engines::Fermi2D::Regs::Surface& config);
45
46 /// Obtains the texture target from a shader's sampler entry.
47 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
48 const VideoCommon::Shader::Sampler& entry);
49
50 /// Obtains the texture target from a shader's sampler entry.
51 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
52 const VideoCommon::Shader::Image& entry);
53
54 std::size_t Hash() const {
55 return static_cast<std::size_t>(
56 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
57 }
58
59 bool operator==(const SurfaceParams& rhs) const;
60
61 bool operator!=(const SurfaceParams& rhs) const {
62 return !operator==(rhs);
63 }
64
65 std::size_t GetGuestSizeInBytes() const {
66 return GetInnerMemorySize(false, false, false);
67 }
68
69 std::size_t GetHostSizeInBytes(bool is_converted) const {
70 if (!is_converted) {
71 return GetInnerMemorySize(true, false, false);
72 }
73 // ASTC is uncompressed in software, in emulated as RGBA8
74 std::size_t host_size_in_bytes = 0;
75 for (u32 level = 0; level < num_levels; ++level) {
76 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
77 }
78 return host_size_in_bytes;
79 }
80
81 u32 GetBlockAlignedWidth() const {
82 return Common::AlignUp(width, 64 / GetBytesPerPixel());
83 }
84
85 /// Returns the width of a given mipmap level.
86 u32 GetMipWidth(u32 level) const {
87 return std::max(1U, width >> level);
88 }
89
90 /// Returns the height of a given mipmap level.
91 u32 GetMipHeight(u32 level) const {
92 return std::max(1U, height >> level);
93 }
94
95 /// Returns the depth of a given mipmap level.
96 u32 GetMipDepth(u32 level) const {
97 return is_layered ? depth : std::max(1U, depth >> level);
98 }
99
100 /// Returns the block height of a given mipmap level.
101 u32 GetMipBlockHeight(u32 level) const;
102
103 /// Returns the block depth of a given mipmap level.
104 u32 GetMipBlockDepth(u32 level) const;
105
106 /// Returns the best possible row/pitch alignment for the surface.
107 u32 GetRowAlignment(u32 level, bool is_converted) const {
108 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
109 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
110 }
111
112 /// Returns the offset in bytes in guest memory of a given mipmap level.
113 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
114
115 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
116 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
117
118 /// Returns the size in bytes in guest memory of a given mipmap level.
119 std::size_t GetGuestMipmapSize(u32 level) const {
120 return GetInnerMipmapMemorySize(level, false, false);
121 }
122
123 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
124 std::size_t GetHostMipmapSize(u32 level) const {
125 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
126 }
127
128 std::size_t GetConvertedMipmapSize(u32 level) const;
129
130 /// Get this texture Tegra Block size in guest memory layout
131 u32 GetBlockSize() const;
132
133 /// Get X, Y coordinates max sizes of a single block.
134 std::pair<u32, u32> GetBlockXY() const;
135
136 /// Get the offset in x, y, z coordinates from a memory offset
137 std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
138
139 /// Returns the size of a layer in bytes in guest memory.
140 std::size_t GetGuestLayerSize() const {
141 return GetLayerSize(false, false);
142 }
143
144 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
145 std::size_t GetHostLayerSize(u32 level) const {
146 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
147 return GetInnerMipmapMemorySize(level, true, false);
148 }
149
150 /// Returns the max possible mipmap that the texture can have in host gpu
151 u32 MaxPossibleMipmap() const {
152 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
153 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
154 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
155 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
156 return max_mipmap;
157 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
158 }
159
160 /// Returns if the guest surface is a compressed surface.
161 bool IsCompressed() const {
162 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
163 }
164
165 /// Returns the default block width.
166 u32 GetDefaultBlockWidth() const {
167 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
168 }
169
170 /// Returns the default block height.
171 u32 GetDefaultBlockHeight() const {
172 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
173 }
174
175 /// Returns the bits per pixel.
176 u32 GetBitsPerPixel() const {
177 return VideoCore::Surface::GetFormatBpp(pixel_format);
178 }
179
180 /// Returns the bytes per pixel.
181 u32 GetBytesPerPixel() const {
182 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
183 }
184
185 /// Returns true if the pixel format is a depth and/or stencil format.
186 bool IsPixelFormatZeta() const {
187 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
188 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
189 }
190
191 /// Returns is the surface is a TextureBuffer type of surface.
192 bool IsBuffer() const {
193 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
194 }
195
196 /// Returns the number of layers in the surface.
197 std::size_t GetNumLayers() const {
198 return is_layered ? depth : 1;
199 }
200
201 /// Returns the debug name of the texture for use in graphic debuggers.
202 std::string TargetName() const;
203
204 // Helper used for out of class size calculations
205 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
206 const u32 block_depth) {
207 return Common::AlignBits(out_size,
208 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
209 }
210
211 /// Converts a width from a type of surface into another. This helps represent the
212 /// equivalent value between compressed/non-compressed textures.
213 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
214 VideoCore::Surface::PixelFormat pixel_format_to) {
215 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
216 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
217 return (width * bw2 + bw1 - 1) / bw1;
218 }
219
220 /// Converts a height from a type of surface into another. This helps represent the
221 /// equivalent value between compressed/non-compressed textures.
222 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
223 VideoCore::Surface::PixelFormat pixel_format_to) {
224 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
225 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
226 return (height * bh2 + bh1 - 1) / bh1;
227 }
228
229 // Finds the maximun possible width between 2 2D layers of different formats
230 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
231 const u32 src_level, const u32 dst_level) {
232 const u32 bw1 = src_params.GetDefaultBlockWidth();
233 const u32 bw2 = dst_params.GetDefaultBlockWidth();
234 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
235 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
236 return std::min(t_src_width, t_dst_width);
237 }
238
239 // Finds the maximun possible height between 2 2D layers of different formats
240 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
241 const u32 src_level, const u32 dst_level) {
242 const u32 bh1 = src_params.GetDefaultBlockHeight();
243 const u32 bh2 = dst_params.GetDefaultBlockHeight();
244 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
245 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
246 return std::min(t_src_height, t_dst_height);
247 }
248
249 bool is_tiled;
250 bool srgb_conversion;
251 bool is_layered;
252 u32 block_width;
253 u32 block_height;
254 u32 block_depth;
255 u32 tile_width_spacing;
256 u32 width;
257 u32 height;
258 u32 depth;
259 u32 pitch;
260 u32 num_levels;
261 u32 emulated_levels;
262 VideoCore::Surface::PixelFormat pixel_format;
263 VideoCore::Surface::SurfaceType type;
264 VideoCore::Surface::SurfaceTarget target;
265
266private:
267 /// Returns the size of a given mipmap level inside a layer.
268 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
269
270 /// Returns the size of all mipmap levels and aligns as needed.
271 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
272 return GetLayerSize(as_host_size, uncompressed) *
273 (layer_only ? 1U : (is_layered ? depth : 1U));
274 }
275
276 /// Returns the size of a layer
277 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
278
279 /// Returns true if these parameters are from a layered surface.
280 bool IsLayered() const;
281};
282
283} // namespace VideoCommon
284
285namespace std {
286
287template <>
288struct hash<VideoCommon::SurfaceParams> {
289 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
290 return k.Hash();
291 }
292};
293
294} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23bool ViewParams::operator!=(const ViewParams& rhs) const {
24 return !operator==(rhs);
25}
26
27} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 199f72732..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_,
17 u32 num_layers_, u32 base_level_, u32 num_levels_)
18 : target{target_}, base_layer{base_layer_}, num_layers{num_layers_},
19 base_level{base_level_}, num_levels{num_levels_} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24 bool operator!=(const ViewParams& rhs) const;
25
26 bool IsLayered() const {
27 switch (target) {
28 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
29 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
30 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
31 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
32 return true;
33 default:
34 return false;
35 }
36 }
37
38 VideoCore::Surface::SurfaceTarget target{};
39 u32 base_layer{};
40 u32 num_layers{};
41 u32 base_level{};
42 u32 num_levels{};
43};
44
45class ViewBase {
46public:
47 constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {}
48
49 constexpr const ViewParams& GetViewParams() const {
50 return params;
51 }
52
53protected:
54 ViewParams params;
55};
56
57} // namespace VideoCommon
58
59namespace std {
60
61template <>
62struct hash<VideoCommon::ViewParams> {
63 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
64 return k.Hash();
65 }
66};
67
68} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 581d8dd5b..968059842 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1298 +6,1449 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list> 9#include <bit>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <set> 12#include <optional>
13#include <tuple> 13#include <span>
14#include <type_traits>
14#include <unordered_map> 15#include <unordered_map>
16#include <utility>
15#include <vector> 17#include <vector>
16 18
17#include <boost/container/small_vector.hpp> 19#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_map.hpp>
19#include <boost/range/iterator_range.hpp>
20 20
21#include "common/assert.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h"
22#include "common/common_types.h" 23#include "common/common_types.h"
23#include "common/math_util.h" 24#include "common/logging/log.h"
24#include "core/core.h"
25#include "core/memory.h"
26#include "core/settings.h"
27#include "video_core/compatible_formats.h" 25#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h"
28#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
29#include "video_core/engines/fermi_2d.h" 28#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/kepler_compute.h"
30#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
31#include "video_core/gpu.h"
32#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 32#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h" 33#include "video_core/surface.h"
35#include "video_core/texture_cache/copy_params.h" 34#include "video_core/texture_cache/descriptor_table.h"
36#include "video_core/texture_cache/format_lookup_table.h" 35#include "video_core/texture_cache/format_lookup_table.h"
37#include "video_core/texture_cache/surface_base.h" 36#include "video_core/texture_cache/formatter.h"
38#include "video_core/texture_cache/surface_params.h" 37#include "video_core/texture_cache/image_base.h"
39#include "video_core/texture_cache/surface_view.h" 38#include "video_core/texture_cache/image_info.h"
40 39#include "video_core/texture_cache/image_view_base.h"
41namespace Tegra::Texture { 40#include "video_core/texture_cache/image_view_info.h"
42struct FullTextureInfo; 41#include "video_core/texture_cache/render_targets.h"
43} 42#include "video_core/texture_cache/samples_helper.h"
44 43#include "video_core/texture_cache/slot_vector.h"
45namespace VideoCore { 44#include "video_core/texture_cache/types.h"
46class RasterizerInterface; 45#include "video_core/texture_cache/util.h"
47} 46#include "video_core/textures/texture.h"
48 47
49namespace VideoCommon { 48namespace VideoCommon {
50 49
51using VideoCore::Surface::FormatCompatibility; 50using Tegra::Texture::SwizzleSource;
51using Tegra::Texture::TextureType;
52using Tegra::Texture::TICEntry;
53using Tegra::Texture::TSCEntry;
54using VideoCore::Surface::GetFormatType;
55using VideoCore::Surface::IsCopyCompatible;
52using VideoCore::Surface::PixelFormat; 56using VideoCore::Surface::PixelFormat;
53using VideoCore::Surface::SurfaceTarget; 57using VideoCore::Surface::PixelFormatFromDepthFormat;
54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 58using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
59using VideoCore::Surface::SurfaceType;
55 60
56template <typename TSurface, typename TView> 61template <class P>
57class TextureCache { 62class TextureCache {
58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 63 /// Address shift for caching images into a hash table
64 static constexpr u64 PAGE_SHIFT = 20;
65
66 /// Enables debugging features to the texture cache
67 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
68 /// Implement blits as copies between framebuffers
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
72
73 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77
78 using Runtime = typename P::Runtime;
79 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc;
81 using ImageView = typename P::ImageView;
82 using Sampler = typename P::Sampler;
83 using Framebuffer = typename P::Framebuffer;
84
85 struct BlitImages {
86 ImageId dst_id;
87 ImageId src_id;
88 PixelFormat dst_format;
89 PixelFormat src_format;
90 };
91
92 template <typename T>
93 struct IdentityHash {
94 [[nodiscard]] size_t operator()(T value) const noexcept {
95 return static_cast<size_t>(value);
96 }
97 };
59 98
60public: 99public:
61 void InvalidateRegion(VAddr addr, std::size_t size) { 100 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
62 std::lock_guard lock{mutex}; 101 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
63 102
64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 103 /// Notify the cache that a new frame has been queued
65 Unregister(surface); 104 void TickFrame();
66 }
67 }
68 105
69 void OnCPUWrite(VAddr addr, std::size_t size) { 106 /// Return an unique mutually exclusive lock for the cache
70 std::lock_guard lock{mutex}; 107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
71 108
72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 109 /// Return a constant reference to the given image view id
73 if (surface->IsMemoryMarked()) { 110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
74 UnmarkMemory(surface);
75 surface->SetSyncPending(true);
76 marked_for_unregister.emplace_back(surface);
77 }
78 }
79 }
80 111
81 void SyncGuestHost() { 112 /// Return a reference to the given image view id
82 std::lock_guard lock{mutex}; 113 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
83 114
84 for (const auto& surface : marked_for_unregister) { 115 /// Fill image_view_ids with the graphics images in indices
85 if (surface->IsRegistered()) { 116 void FillGraphicsImageViews(std::span<const u32> indices,
86 surface->SetSyncPending(false); 117 std::span<ImageViewId> image_view_ids);
87 Unregister(surface);
88 }
89 }
90 marked_for_unregister.clear();
91 }
92 118
93 /** 119 /// Fill image_view_ids with the compute images in indices
94 * Guarantees that rendertargets don't unregister themselves if the 120 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
95 * collide. Protection is currently only done on 3D slices.
96 */
97 void GuardRenderTargets(bool new_guard) {
98 guard_render_targets = new_guard;
99 }
100 121
101 void GuardSamplers(bool new_guard) { 122 /// Get the sampler from the graphics descriptor table in the specified index
102 guard_samplers = new_guard; 123 Sampler* GetGraphicsSampler(u32 index);
103 }
104 124
105 void FlushRegion(VAddr addr, std::size_t size) { 125 /// Get the sampler from the compute descriptor table in the specified index
106 std::lock_guard lock{mutex}; 126 Sampler* GetComputeSampler(u32 index);
107 127
108 auto surfaces = GetSurfacesInRegion(addr, size); 128 /// Refresh the state for graphics image view and sampler descriptors
109 if (surfaces.empty()) { 129 void SynchronizeGraphicsDescriptors();
110 return;
111 }
112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113 return a->GetModificationTick() < b->GetModificationTick();
114 });
115 for (const auto& surface : surfaces) {
116 mutex.unlock();
117 FlushSurface(surface);
118 mutex.lock();
119 }
120 }
121 130
122 bool MustFlushRegion(VAddr addr, std::size_t size) { 131 /// Refresh the state for compute image view and sampler descriptors
123 std::lock_guard lock{mutex}; 132 void SynchronizeComputeDescriptors();
124 133
125 const auto surfaces = GetSurfacesInRegion(addr, size); 134 /// Update bound render targets and upload memory if necessary
126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 135 /// @param is_clear True when the render targets are being used for clears
127 [](const TSurface& surface) { return surface->IsModified(); }); 136 void UpdateRenderTargets(bool is_clear);
128 }
129 137
130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 138 /// Find a framebuffer with the currently bound render targets
131 const VideoCommon::Shader::Sampler& entry) { 139 /// UpdateRenderTargets should be called before this
132 std::lock_guard lock{mutex}; 140 Framebuffer* GetFramebuffer();
133 const auto gpu_addr{tic.Address()};
134 if (!gpu_addr) {
135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136 }
137 141
138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 142 /// Mark images in a range as modified from the CPU
139 if (!cpu_addr) { 143 void WriteMemory(VAddr cpu_addr, size_t size);
140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141 }
142 144
143 if (!IsTypeCompatible(tic.texture_type, entry)) { 145 /// Download contents of host images to guest memory in a region
144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 146 void DownloadMemory(VAddr cpu_addr, size_t size);
145 }
146 147
147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 /// Remove images in a region
148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 void UnmapMemory(VAddr cpu_addr, size_t size);
149 if (guard_samplers) {
150 sampled_textures.push_back(surface);
151 }
152 return view;
153 }
154 150
155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 151 /// Blit an image with the given parameters
156 const VideoCommon::Shader::Image& entry) { 152 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 std::lock_guard lock{mutex}; 153 const Tegra::Engines::Fermi2D::Surface& src,
158 const auto gpu_addr{tic.Address()}; 154 const Tegra::Engines::Fermi2D::Config& copy);
159 if (!gpu_addr) {
160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161 }
162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163 if (!cpu_addr) {
164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165 }
166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168 if (guard_samplers) {
169 sampled_textures.push_back(surface);
170 }
171 return view;
172 }
173 155
174 bool TextureBarrier() { 156 /// Invalidate the contents of the color buffer index
175 const bool any_rt = 157 /// These contents become unspecified, the cache can assume aggressive optimizations.
176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 158 void InvalidateColorBuffer(size_t index);
177 [](const auto& surface) { return surface->IsRenderTarget(); });
178 sampled_textures.clear();
179 return any_rt;
180 }
181 159
182 TView GetDepthBufferSurface(bool preserve_contents) { 160 /// Invalidate the contents of the depth buffer
183 std::lock_guard lock{mutex}; 161 /// These contents become unspecified, the cache can assume aggressive optimizations.
184 auto& dirty = maxwell3d.dirty; 162 void InvalidateDepthBuffer();
185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186 return depth_buffer.view;
187 }
188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 163
190 const auto& regs{maxwell3d.regs}; 164 /// Try to find a cached image view in the given CPU address
191 const auto gpu_addr{regs.zeta.Address()}; 165 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
192 if (!gpu_addr || !regs.zeta_enable) {
193 SetEmptyDepthBuffer();
194 return {};
195 }
196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197 if (!cpu_addr) {
198 SetEmptyDepthBuffer();
199 return {};
200 }
201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203 if (depth_buffer.target)
204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205 depth_buffer.target = surface_view.first;
206 depth_buffer.view = surface_view.second;
207 if (depth_buffer.target)
208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209 return surface_view.second;
210 }
211
212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213 std::lock_guard lock{mutex};
214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216 return render_targets[index].view;
217 }
218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 166
220 const auto& regs{maxwell3d.regs}; 167 /// Return true when there are uncommitted images to be downloaded
221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 168 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223 SetEmptyColorBuffer(index);
224 return {};
225 }
226 169
227 const auto& config{regs.rt[index]}; 170 /// Return true when the caller should wait for async downloads
228 const auto gpu_addr{config.Address()}; 171 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
229 if (!gpu_addr) {
230 SetEmptyColorBuffer(index);
231 return {};
232 }
233 172
234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 173 /// Commit asynchronous downloads
235 if (!cpu_addr) { 174 void CommitAsyncFlushes();
236 SetEmptyColorBuffer(index); 175
237 return {}; 176 /// Pop asynchronous downloads
238 } 177 void PopAsyncFlushes();
178
179 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
239 181
240 auto surface_view = 182private:
241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 183 /// Iterate over all page indices in a range
242 preserve_contents, true); 184 template <typename Func>
243 if (render_targets[index].target) { 185 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
244 auto& surface = render_targets[index].target; 186 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
245 surface->MarkAsRenderTarget(false, NO_RT); 187 const u64 page_end = (addr + size - 1) >> PAGE_SHIFT;
246 const auto& cr_params = surface->GetSurfaceParams(); 188 for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 189 if constexpr (RETURNS_BOOL) {
248 AsyncFlushSurface(surface); 190 if (func(page)) {
191 break;
192 }
193 } else {
194 func(page);
249 } 195 }
250 } 196 }
251 render_targets[index].target = surface_view.first;
252 render_targets[index].view = surface_view.second;
253 if (render_targets[index].target)
254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255 return surface_view.second;
256 } 197 }
257 198
258 void MarkColorBufferInUse(std::size_t index) { 199 /// Fills image_view_ids in the image views in indices
259 if (auto& render_target = render_targets[index].target) { 200 void FillImageViews(DescriptorTable<TICEntry>& table,
260 render_target->MarkAsModified(true, Tick()); 201 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
261 } 202 std::span<ImageViewId> image_view_ids);
262 }
263 203
264 void MarkDepthBufferInUse() { 204 /// Find or create an image view in the guest descriptor table
265 if (depth_buffer.target) { 205 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
266 depth_buffer.target->MarkAsModified(true, Tick()); 206 std::span<ImageViewId> cached_image_view_ids, u32 index);
267 }
268 }
269 207
270 void SetEmptyDepthBuffer() { 208 /// Find or create a framebuffer with the given render target parameters
271 if (depth_buffer.target == nullptr) { 209 FramebufferId GetFramebufferId(const RenderTargets& key);
272 return;
273 }
274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275 depth_buffer.target = nullptr;
276 depth_buffer.view = nullptr;
277 }
278 210
279 void SetEmptyColorBuffer(std::size_t index) { 211 /// Refresh the contents (pixel data) of an image
280 if (render_targets[index].target == nullptr) { 212 void RefreshContents(Image& image);
281 return;
282 }
283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284 render_targets[index].target = nullptr;
285 render_targets[index].view = nullptr;
286 }
287
288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290 const Tegra::Engines::Fermi2D::Config& copy_config) {
291 std::lock_guard lock{mutex};
292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294 const GPUVAddr src_gpu_addr = src_config.Address();
295 const GPUVAddr dst_gpu_addr = dst_config.Address();
296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297
298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302 ImageBlit(src_surface, dst_surface.second, copy_config);
303 dst_surface.first->MarkAsModified(true, Tick());
304 }
305
306 TSurface TryFindFramebufferSurface(VAddr addr) const {
307 if (!addr) {
308 return nullptr;
309 }
310 const VAddr page = addr >> registry_page_bits;
311 const auto it = registry.find(page);
312 if (it == registry.end()) {
313 return nullptr;
314 }
315 const auto& list = it->second;
316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317 return surface->GetCpuAddr() == addr;
318 });
319 return found != list.end() ? *found : nullptr;
320 }
321 213
322 u64 Tick() { 214 /// Upload data from guest to an image
323 return ++ticks; 215 template <typename MapBuffer>
324 } 216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
325 217
326 void CommitAsyncFlushes() { 218 /// Find or create an image view from a guest descriptor
327 committed_flushes.push_back(uncommitted_flushes); 219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
328 uncommitted_flushes.reset();
329 }
330 220
331 bool HasUncommittedFlushes() const { 221 /// Create a new image view from a guest descriptor
332 return uncommitted_flushes != nullptr; 222 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
333 }
334 223
335 bool ShouldWaitAsyncFlushes() const { 224 /// Find or create an image from the given parameters
336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 225 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
337 } 226 RelaxedOptions options = RelaxedOptions{});
338 227
339 void PopAsyncFlushes() { 228 /// Find an image from the given parameters
340 if (committed_flushes.empty()) { 229 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
341 return; 230 RelaxedOptions options);
342 }
343 auto& flush_list = committed_flushes.front();
344 if (!flush_list) {
345 committed_flushes.pop_front();
346 return;
347 }
348 for (TSurface& surface : *flush_list) {
349 FlushSurface(surface);
350 }
351 committed_flushes.pop_front();
352 }
353 231
354protected: 232 /// Create an image from the given parameters
355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 233 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 234 RelaxedOptions options);
357 bool is_astc_supported_)
358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359 gpu_memory{gpu_memory_} {
360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361 SetEmptyColorBuffer(i);
362 }
363 235
364 SetEmptyDepthBuffer(); 236 /// Create a new image and join perfectly matching existing images
365 staging_cache.SetSize(2); 237 /// Remove joined images from the cache
238 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
366 239
367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 240 /// Return a blit image pair from the given guest blit parameters
368 siblings_table[static_cast<std::size_t>(a)] = b; 241 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
369 siblings_table[static_cast<std::size_t>(b)] = a; 242 const Tegra::Engines::Fermi2D::Surface& src);
370 };
371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 243
376 sampled_textures.reserve(64); 244 /// Find or create a sampler from a guest descriptor sampler
377 } 245 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
378 246
379 ~TextureCache() = default; 247 /// Find or create an image view for the given color buffer index
248 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
380 249
381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 250 /// Find or create an image view for the depth buffer
251 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
382 252
383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 253 /// Find or create a view for a render target with the given image parameters
384 const CopyParams& copy_params) = 0; 254 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
255 bool is_clear);
385 256
386 virtual void ImageBlit(TView& src_view, TView& dst_view, 257 /// Iterates over all the images in a region calling func
387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 258 template <typename Func>
259 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
388 260
389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 261 /// Find or create an image view in the given image with the passed parameters
390 // and reading it from a separate buffer. 262 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 263
393 void ManageRenderTargetUnregister(TSurface& surface) { 264 /// Register image in the page table
394 auto& dirty = maxwell3d.dirty; 265 void RegisterImage(ImageId image);
395 const u32 index = surface->GetRenderTarget(); 266
396 if (index == DEPTH_RT) { 267 /// Unregister image from the page table
397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 268 void UnregisterImage(ImageId image);
398 } else { 269
399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 270 /// Track CPU reads and writes for image
400 } 271 void TrackImage(ImageBase& image);
401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 272
273 /// Stop tracking CPU reads and writes for image
274 void UntrackImage(ImageBase& image);
275
276 /// Delete image from the cache
277 void DeleteImage(ImageId image);
278
279 /// Remove image views references from the cache
280 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
281
282 /// Remove framebuffers using the given image views from the cache
283 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
284
285 /// Mark an image as modified from the GPU
286 void MarkModification(ImageBase& image) noexcept;
287
288 /// Synchronize image aliases, copying data if needed
289 void SynchronizeAliases(ImageId image_id);
290
291 /// Prepare an image to be used
292 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
293
294 /// Prepare an image view to be used
295 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
296
297 /// Execute copies from one image to the other, even if they are incompatible
298 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
299
300 /// Bind an image view as render target, downloading resources preemtively if needed
301 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
302
303 /// Create a render target from a given image and image view parameters
304 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
305 ImageId, const ImageViewInfo& view_info);
306
307 /// Returns true if the current clear parameters clear the whole image of a given image view
308 [[nodiscard]] bool IsFullClear(ImageViewId id);
309
310 Runtime& runtime;
311 VideoCore::RasterizerInterface& rasterizer;
312 Tegra::Engines::Maxwell3D& maxwell3d;
313 Tegra::Engines::KeplerCompute& kepler_compute;
314 Tegra::MemoryManager& gpu_memory;
315
316 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
317 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
318 std::vector<SamplerId> graphics_sampler_ids;
319 std::vector<ImageViewId> graphics_image_view_ids;
320
321 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
322 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
323 std::vector<SamplerId> compute_sampler_ids;
324 std::vector<ImageViewId> compute_image_view_ids;
325
326 RenderTargets render_targets;
327
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
333
334 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
335
336 bool has_deleted_images = false;
337
338 SlotVector<Image> slot_images;
339 SlotVector<ImageView> slot_image_views;
340 SlotVector<ImageAlloc> slot_image_allocs;
341 SlotVector<Sampler> slot_samplers;
342 SlotVector<Framebuffer> slot_framebuffers;
343
344 // TODO: This data structure is not optimal and it should be reworked
345 std::vector<ImageId> uncommitted_downloads;
346 std::queue<std::vector<ImageId>> committed_downloads;
347
348 static constexpr size_t TICKS_TO_DESTROY = 6;
349 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
350 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
351 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
352
353 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
354
355 u64 modification_tick = 0;
356 u64 frame_tick = 0;
357};
358
359template <class P>
360TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
361 Tegra::Engines::Maxwell3D& maxwell3d_,
362 Tegra::Engines::KeplerCompute& kepler_compute_,
363 Tegra::MemoryManager& gpu_memory_)
364 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
365 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
366 // Configure null sampler
367 TSCEntry sampler_descriptor{};
368 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
369 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
370 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
371 sampler_descriptor.cubemap_anisotropy.Assign(1);
372
373 // Make sure the first index is reserved for the null resources
374 // This way the null resource becomes a compile time constant
375 void(slot_image_views.insert(runtime, NullImageParams{}));
376 void(slot_samplers.insert(runtime, sampler_descriptor));
377}
378
379template <class P>
380void TextureCache<P>::TickFrame() {
381 // Tick sentenced resources in this order to ensure they are destroyed in the right order
382 sentenced_images.Tick();
383 sentenced_framebuffers.Tick();
384 sentenced_image_view.Tick();
385 ++frame_tick;
386}
387
388template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id];
396}
397
398template <class P>
399typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
400 return slot_image_views[id];
401}
402
403template <class P>
404void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
405 std::span<ImageViewId> image_view_ids) {
406 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
407}
408
409template <class P>
410void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
411 std::span<ImageViewId> image_view_ids) {
412 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
413}
414
415template <class P>
416typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
417 [[unlikely]] if (index > graphics_sampler_table.Limit()) {
418 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
419 return &slot_samplers[NULL_SAMPLER_ID];
420 }
421 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
422 SamplerId& id = graphics_sampler_ids[index];
423 [[unlikely]] if (is_new) {
424 id = FindSampler(descriptor);
402 } 425 }
426 return &slot_samplers[id];
427}
403 428
404 void Register(TSurface surface) { 429template <class P>
405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 430typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
406 const std::size_t size = surface->GetSizeInBytes(); 431 [[unlikely]] if (index > compute_sampler_table.Limit()) {
407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 432 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
408 if (!cpu_addr) { 433 return &slot_samplers[NULL_SAMPLER_ID];
409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 434 }
410 gpu_addr); 435 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
411 return; 436 SamplerId& id = compute_sampler_ids[index];
412 } 437 [[unlikely]] if (is_new) {
413 surface->SetCpuAddr(*cpu_addr); 438 id = FindSampler(descriptor);
414 RegisterInnerCache(surface);
415 surface->MarkAsRegistered(true);
416 surface->SetMemoryMarked(true);
417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418 } 439 }
440 return &slot_samplers[id];
441}
419 442
420 void UnmarkMemory(TSurface surface) { 443template <class P>
421 if (!surface->IsMemoryMarked()) { 444void TextureCache<P>::SynchronizeGraphicsDescriptors() {
422 return; 445 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
423 } 446 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
424 const std::size_t size = surface->GetSizeInBytes(); 447 const u32 tic_limit = maxwell3d.regs.tic.limit;
425 const VAddr cpu_addr = surface->GetCpuAddr(); 448 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 449 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
427 surface->SetMemoryMarked(false); 450 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
428 } 451 }
452 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
453 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
454 }
455}
429 456
430 void Unregister(TSurface surface) { 457template <class P>
431 if (guard_render_targets && surface->IsProtected()) { 458void TextureCache<P>::SynchronizeComputeDescriptors() {
432 return; 459 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
433 } 460 const u32 tic_limit = kepler_compute.regs.tic.limit;
434 if (!guard_render_targets && surface->IsRenderTarget()) { 461 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
435 ManageRenderTargetUnregister(surface); 462 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
436 } 463 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
437 UnmarkMemory(surface); 464 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
438 if (surface->IsSyncPending()) {
439 marked_for_unregister.remove(surface);
440 surface->SetSyncPending(false);
441 }
442 UnregisterInnerCache(surface);
443 surface->MarkAsRegistered(false);
444 ReserveSurface(surface->GetSurfaceParams(), surface);
445 } 465 }
466 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
467 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
468 }
469}
446 470
447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 471template <class P>
448 if (const auto surface = TryGetReservedSurface(params); surface) { 472void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
449 surface->SetGpuAddr(gpu_addr); 473 using namespace VideoCommon::Dirty;
450 return surface; 474 auto& flags = maxwell3d.dirty.flags;
451 } 475 if (!flags[Dirty::RenderTargets]) {
452 // No reserved surface available, create a new one and reserve it 476 return;
453 auto new_surface{CreateSurface(gpu_addr, params)};
454 return new_surface;
455 } 477 }
478 flags[Dirty::RenderTargets] = false;
456 479
457 const bool is_astc_supported; 480 // Render target control is used on all render targets, so force look ups when this one is up
481 const bool force = flags[Dirty::RenderTargetControl];
482 flags[Dirty::RenderTargetControl] = false;
458 483
459private: 484 for (size_t index = 0; index < NUM_RT; ++index) {
460 enum class RecycleStrategy : u32 { 485 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
461 Ignore = 0, 486 if (flags[Dirty::ColorBuffer0 + index] || force) {
462 Flush = 1, 487 flags[Dirty::ColorBuffer0 + index] = false;
463 BufferCopy = 3, 488 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
464 }; 489 }
490 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
491 }
492 if (flags[Dirty::ZetaBuffer] || force) {
493 flags[Dirty::ZetaBuffer] = false;
494 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
495 }
496 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
497 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
465 498
466 enum class DeductionType : u32 { 499 for (size_t index = 0; index < NUM_RT; ++index) {
467 DeductionComplete, 500 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
468 DeductionIncomplete, 501 }
469 DeductionFailed, 502 render_targets.size = Extent2D{
503 maxwell3d.regs.render_area.width,
504 maxwell3d.regs.render_area.height,
470 }; 505 };
506}
471 507
472 struct Deduction { 508template <class P>
473 DeductionType type{DeductionType::DeductionFailed}; 509typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
474 TSurface surface{}; 510 return &slot_framebuffers[GetFramebufferId(render_targets)];
511}
475 512
476 bool Failed() const { 513template <class P>
477 return type == DeductionType::DeductionFailed; 514void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
478 } 515 std::span<ImageViewId> cached_image_view_ids,
516 std::span<const u32> indices,
517 std::span<ImageViewId> image_view_ids) {
518 ASSERT(indices.size() <= image_view_ids.size());
519 do {
520 has_deleted_images = false;
521 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
522 return VisitImageView(table, cached_image_view_ids, index);
523 });
524 } while (has_deleted_images);
525}
479 526
480 bool Incomplete() const { 527template <class P>
481 return type == DeductionType::DeductionIncomplete; 528ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
482 } 529 std::span<ImageViewId> cached_image_view_ids,
530 u32 index) {
531 if (index > table.Limit()) {
532 LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
533 return NULL_IMAGE_VIEW_ID;
534 }
535 const auto [descriptor, is_new] = table.Read(index);
536 ImageViewId& image_view_id = cached_image_view_ids[index];
537 if (is_new) {
538 image_view_id = FindImageView(descriptor);
539 }
540 if (image_view_id != NULL_IMAGE_VIEW_ID) {
541 PrepareImageView(image_view_id, false, false);
542 }
543 return image_view_id;
544}
483 545
484 bool IsDepth() const { 546template <class P>
485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 547FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
486 } 548 const auto [pair, is_new] = framebuffers.try_emplace(key);
487 }; 549 FramebufferId& framebuffer_id = pair->second;
550 if (!is_new) {
551 return framebuffer_id;
552 }
553 std::array<ImageView*, NUM_RT> color_buffers;
554 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
555 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
556 ImageView* const depth_buffer =
557 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
558 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
559 return framebuffer_id;
560}
488 561
489 /** 562template <class P>
490 * Takes care of selecting a proper strategy to deal with a texture recycle. 563void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
491 * 564 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
492 * @param overlaps The overlapping surfaces registered in the cache. 565 if (True(image.flags & ImageFlagBits::CpuModified)) {
493 * @param params The parameters on the new surface. 566 return;
494 * @param gpu_addr The starting address of the new surface.
495 * @param untopological Indicates to the recycler that the texture has no way
496 * to match the overlaps due to topological reasons.
497 **/
498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500 if (Settings::IsGPULevelExtreme()) {
501 return RecycleStrategy::Flush;
502 }
503 // 3D Textures decision
504 if (params.target == SurfaceTarget::Texture3D) {
505 return RecycleStrategy::Flush;
506 }
507 for (const auto& s : overlaps) {
508 const auto& s_params = s->GetSurfaceParams();
509 if (s_params.target == SurfaceTarget::Texture3D) {
510 return RecycleStrategy::Flush;
511 }
512 }
513 // Untopological decision
514 if (untopological == MatchTopologyResult::CompressUnmatch) {
515 return RecycleStrategy::Flush;
516 }
517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518 return RecycleStrategy::Flush;
519 }
520 return RecycleStrategy::Ignore;
521 }
522
523 /**
524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525 * strategies: Ignore and Flush.
526 *
527 * - Ignore: Just unregisters all the overlaps and loads the new texture.
528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529 *
530 * @param overlaps The overlapping surfaces registered in the cache.
531 * @param params The parameters for the new surface.
532 * @param gpu_addr The starting address of the new surface.
533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534 * blank.
535 * @param untopological Indicates to the recycler that the texture has no way to match the
536 * overlaps due to topological reasons.
537 **/
538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539 const GPUVAddr gpu_addr, const bool preserve_contents,
540 const MatchTopologyResult untopological) {
541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542 for (auto& surface : overlaps) {
543 Unregister(surface);
544 }
545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546 case RecycleStrategy::Ignore: {
547 return InitializeSurface(gpu_addr, params, do_load);
548 }
549 case RecycleStrategy::Flush: {
550 std::sort(overlaps.begin(), overlaps.end(),
551 [](const TSurface& a, const TSurface& b) -> bool {
552 return a->GetModificationTick() < b->GetModificationTick();
553 });
554 for (auto& surface : overlaps) {
555 FlushSurface(surface);
556 }
557 return InitializeSurface(gpu_addr, params, preserve_contents);
558 } 567 }
559 case RecycleStrategy::BufferCopy: { 568 image.flags |= ImageFlagBits::CpuModified;
560 auto new_surface = GetUncachedSurface(gpu_addr, params); 569 UntrackImage(image);
561 BufferCopy(overlaps[0], new_surface); 570 });
562 return {new_surface, new_surface->GetMainView()}; 571}
572
573template <class P>
574void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
575 std::vector<ImageId> images;
576 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
577 // Skip images that were not modified from the GPU
578 if (False(image.flags & ImageFlagBits::GpuModified)) {
579 return;
563 } 580 }
564 default: { 581 // Skip images that .are. modified from the CPU
565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 582 // We don't want to write sensitive data from the guest
566 return InitializeSurface(gpu_addr, params, do_load); 583 if (True(image.flags & ImageFlagBits::CpuModified)) {
584 return;
567 } 585 }
586 if (image.info.num_samples > 1) {
587 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
588 return;
568 } 589 }
590 image.flags &= ~ImageFlagBits::GpuModified;
591 images.push_back(image_id);
592 });
593 if (images.empty()) {
594 return;
595 }
596 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
597 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
598 });
599 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies);
604 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
569 } 606 }
607}
570 608
571 /** 609template <class P>
572 * Takes a single surface and recreates into another that may differ in 610void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
573 * format, target or width alignment. 611 std::vector<ImageId> deleted_images;
574 * 612 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 * @param current_surface The registered surface in the cache which we want to convert. 613 for (const ImageId id : deleted_images) {
576 * @param params The new surface params which we'll use to recreate the surface. 614 Image& image = slot_images[id];
577 * @param is_render Whether or not the surface is a render target. 615 if (True(image.flags & ImageFlagBits::Tracked)) {
578 **/ 616 UntrackImage(image);
579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 617 }
580 bool is_render) { 618 UnregisterImage(id);
581 const auto gpu_addr = current_surface->GetGpuAddr(); 619 DeleteImage(id);
582 const auto& cr_params = current_surface->GetSurfaceParams(); 620 }
583 TSurface new_surface; 621}
584 if (cr_params.pixel_format != params.pixel_format && !is_render &&
585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586 SurfaceParams new_params = params;
587 new_params.pixel_format = cr_params.pixel_format;
588 new_params.type = cr_params.type;
589 new_surface = GetUncachedSurface(gpu_addr, new_params);
590 } else {
591 new_surface = GetUncachedSurface(gpu_addr, params);
592 }
593 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594 if (cr_params.type != final_params.type) {
595 if (Settings::IsGPULevelExtreme()) {
596 BufferCopy(current_surface, new_surface);
597 }
598 } else {
599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600 for (auto& brick : bricks) {
601 TryCopyImage(current_surface, new_surface, brick);
602 }
603 }
604 Unregister(current_surface);
605 Register(new_surface);
606 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607 return {new_surface, new_surface->GetMainView()};
608 }
609
610 /**
611 * Takes a single surface and checks with the new surface's params if it's an exact
612 * match, we return the main view of the registered surface. If its formats don't
613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614 * match but the targets don't, we create an overview View of the registered surface.
615 *
616 * @param current_surface The registered surface in the cache which we want to convert.
617 * @param params The new surface params which we want to check.
618 * @param is_render Whether or not the surface is a render target.
619 **/
620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621 const SurfaceParams& params, bool is_render) {
622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623 const bool matches_target = current_surface->MatchTarget(params.target);
624 const auto match_check = [&]() -> std::pair<TSurface, TView> {
625 if (matches_target) {
626 return {current_surface, current_surface->GetMainView()};
627 }
628 return {current_surface, current_surface->EmplaceOverview(params)};
629 };
630 if (!is_mirage) {
631 return match_check();
632 }
633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634 return match_check();
635 }
636 return RebuildSurface(current_surface, params, is_render);
637 }
638
639 /**
640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642 * of the new surface, if they all match we end up recreating a surface for them,
643 * else we return nothing.
644 *
645 * @param overlaps The overlapping surfaces registered in the cache.
646 * @param params The parameters on the new surface.
647 * @param gpu_addr The starting address of the new surface.
648 **/
649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650 const SurfaceParams& params,
651 GPUVAddr gpu_addr) {
652 if (params.target == SurfaceTarget::Texture3D) {
653 return std::nullopt;
654 }
655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 622
658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 623template <class P>
659 LoadSurface(new_surface); 624void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
660 for (const auto& surface : overlaps) { 625 const Tegra::Engines::Fermi2D::Surface& src,
661 Unregister(surface); 626 const Tegra::Engines::Fermi2D::Config& copy) {
662 } 627 const BlitImages images = GetBlitImages(dst, src);
663 Register(new_surface); 628 const ImageId dst_id = images.dst_id;
664 return {{new_surface, new_surface->GetMainView()}}; 629 const ImageId src_id = images.src_id;
665 } 630 PrepareImage(src_id, false, false);
631 PrepareImage(dst_id, true, false);
632
633 ImageBase& dst_image = slot_images[dst_id];
634 const ImageBase& src_image = slot_images[src_id];
635
636 // TODO: Deduplicate
637 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
638 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
639 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
640 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
641 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
642 const std::array src_region{
643 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
644 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
645 };
666 646
667 std::size_t passed_tests = 0; 647 const std::optional src_base = src_image.TryFindBase(src.Address());
668 for (auto& surface : overlaps) { 648 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 649 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 650 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
671 if (!mipmap_layer) { 651 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
672 continue; 652 const std::array dst_region{
673 } 653 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
674 const auto [base_layer, base_mipmap] = *mipmap_layer; 654 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 655 };
676 continue;
677 }
678 ++passed_tests;
679
680 // Copy all mipmaps and layers
681 const u32 block_width = params.GetDefaultBlockWidth();
682 const u32 block_height = params.GetDefaultBlockHeight();
683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686 if (width < block_width || height < block_height) {
687 // Current APIs forbid copying small compressed textures, avoid errors
688 break;
689 }
690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691 src_params.depth);
692 TryCopyImage(surface, new_surface, copy_params);
693 }
694 }
695 if (passed_tests == 0) {
696 return std::nullopt;
697 }
698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699 // In Accurate GPU all tests should pass, else we recycle
700 return std::nullopt;
701 }
702 656
703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 657 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
704 for (const auto& surface : overlaps) { 658 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
705 Unregister(surface); 659 if constexpr (FRAMEBUFFER_BLITS) {
706 } 660 // OpenGL blits from framebuffers, not images
661 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
662 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
663 copy.filter, copy.operation);
664 } else {
665 // Vulkan can blit images, but it lacks format reinterpretations
666 // Provide a framebuffer in case it's necessary
667 ImageView& dst_view = slot_image_views[dst_view_id];
668 ImageView& src_view = slot_image_views[src_view_id];
669 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
670 copy.operation);
671 }
672}
707 673
708 new_surface->MarkAsModified(modified, Tick()); 674template <class P>
709 Register(new_surface); 675void TextureCache<P>::InvalidateColorBuffer(size_t index) {
710 return {{new_surface, new_surface->GetMainView()}}; 676 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
711 } 677 color_buffer_id = FindColorBuffer(index, false);
712 678 if (!color_buffer_id) {
713 /** 679 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 680 return;
715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 681 }
716 * the HLE methods. 682 // When invalidating a color buffer, the old contents are no longer relevant
717 * 683 ImageView& color_buffer = slot_image_views[color_buffer_id];
718 * @param overlaps The overlapping surfaces registered in the cache. 684 Image& image = slot_images[color_buffer.image_id];
719 * @param params The parameters on the new surface. 685 image.flags &= ~ImageFlagBits::CpuModified;
720 * @param gpu_addr The starting address of the new surface. 686 image.flags &= ~ImageFlagBits::GpuModified;
721 * @param cpu_addr The starting address of the new surface on physical memory.
722 * @param preserve_contents Indicates that the new surface should be loaded from memory or
723 * left blank.
724 */
725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726 const SurfaceParams& params,
727 GPUVAddr gpu_addr, VAddr cpu_addr,
728 bool preserve_contents) {
729 if (params.target != SurfaceTarget::Texture3D) {
730 for (const auto& surface : overlaps) {
731 if (!surface->MatchTarget(params.target)) {
732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733 if (Settings::IsGPULevelExtreme()) {
734 return std::nullopt;
735 }
736 Unregister(surface);
737 return InitializeSurface(gpu_addr, params, preserve_contents);
738 }
739 return std::nullopt;
740 }
741 if (surface->GetCpuAddr() != cpu_addr) {
742 continue;
743 }
744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745 return std::make_pair(surface, surface->GetMainView());
746 }
747 }
748 return InitializeSurface(gpu_addr, params, preserve_contents);
749 }
750 687
751 if (params.num_levels > 1) { 688 runtime.InvalidateColorBuffer(color_buffer, index);
752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 689}
753 return std::nullopt;
754 }
755 690
756 if (overlaps.size() == 1) { 691template <class P>
757 const auto& surface = overlaps[0]; 692void TextureCache<P>::InvalidateDepthBuffer() {
758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 693 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
759 // Don't attempt to render to textures with more than one level for now 694 depth_buffer_id = FindDepthBuffer(false);
760 // The texture has to be to the right or the sample address if we want to render to it 695 if (!depth_buffer_id) {
761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 696 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 697 return;
763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 698 }
764 if (slice < overlap_params.depth) { 699 // When invalidating the depth buffer, the old contents are no longer relevant
765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 700 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
766 return std::make_pair(std::move(surface), std::move(view)); 701 image.flags &= ~ImageFlagBits::CpuModified;
767 } 702 image.flags &= ~ImageFlagBits::GpuModified;
768 }
769 }
770 703
771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 704 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
772 bool modified = false; 705 runtime.InvalidateDepthBuffer(depth_buffer);
706}
773 707
774 for (auto& surface : overlaps) { 708template <class P>
775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 709typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
776 if (src_params.target != SurfaceTarget::Texture2D || 710 // TODO: Properly implement this
777 src_params.height != params.height || 711 const auto it = page_table.find(cpu_addr >> PAGE_SHIFT);
778 src_params.block_depth != params.block_depth || 712 if (it == page_table.end()) {
779 src_params.block_height != params.block_height) { 713 return nullptr;
780 return std::nullopt; 714 }
781 } 715 const auto& image_ids = it->second;
782 modified |= surface->IsModified(); 716 for (const ImageId image_id : image_ids) {
783 717 const ImageBase& image = slot_images[image_id];
784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 718 if (image.cpu_addr != cpu_addr) {
785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 719 continue;
786 const u32 width = params.width;
787 const u32 height = params.height;
788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789 TryCopyImage(surface, new_surface, copy_params);
790 } 720 }
791 for (const auto& surface : overlaps) { 721 if (image.image_view_ids.empty()) {
792 Unregister(surface); 722 continue;
793 } 723 }
794 new_surface->MarkAsModified(modified, Tick()); 724 return &slot_image_views[image.image_view_ids.at(0)];
795 Register(new_surface); 725 }
796 726 return nullptr;
797 TView view = new_surface->GetMainView(); 727}
798 return std::make_pair(std::move(new_surface), std::move(view));
799 }
800
801 /**
802 * Gets the starting address and parameters of a candidate surface and tries
803 * to find a matching surface within the cache. This is done in 3 big steps:
804 *
805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806 *
807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808 * memory else we move to step 3.
809 *
810 * 3. Consists of figuring out the relationship between the candidate texture and the
811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812 * there's many, we just try to reconstruct a new surface out of them based on the
813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816 * a new surface.
817 *
818 * @param gpu_addr The starting address of the candidate surface.
819 * @param params The parameters on the candidate surface.
820 * @param preserve_contents Indicates that the new surface should be loaded from memory or
821 * left blank.
822 * @param is_render Whether or not the surface is a render target.
823 **/
824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825 const SurfaceParams& params, bool preserve_contents,
826 bool is_render) {
827 // Step 1
828 // Check Level 1 Cache for a fast structural match. If candidate surface
829 // matches at certain level we are pretty much done.
830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831 TSurface& current_surface = iter->second;
832 const auto topological_result = current_surface->MatchesTopology(params);
833 if (topological_result != MatchTopologyResult::FullMatch) {
834 VectorSurface overlaps{current_surface};
835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836 topological_result);
837 }
838 728
839 const auto struct_result = current_surface->MatchesStructure(params); 729template <class P>
840 if (struct_result != MatchStructureResult::None) { 730bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
841 const auto& old_params = current_surface->GetSurfaceParams(); 731 return !uncommitted_downloads.empty();
842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 732}
843 old_params.target != SurfaceTarget::Texture3D;
844 if (not_3d || current_surface->MatchTarget(params.target)) {
845 if (struct_result == MatchStructureResult::FullMatch) {
846 return ManageStructuralMatch(current_surface, params, is_render);
847 } else {
848 return RebuildSurface(current_surface, params, is_render);
849 }
850 }
851 }
852 }
853 733
854 // Step 2 734template <class P>
855 // Obtain all possible overlaps in the memory region 735bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 736 return !committed_downloads.empty() && !committed_downloads.front().empty();
857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 737}
858 738
859 // If none are found, we are done. we just load the surface and create it. 739template <class P>
860 if (overlaps.empty()) { 740void TextureCache<P>::CommitAsyncFlushes() {
861 return InitializeSurface(gpu_addr, params, preserve_contents); 741 // This is intentionally passing the value by copy
862 } 742 committed_downloads.push(uncommitted_downloads);
743 uncommitted_downloads.clear();
744}
863 745
864 // Step 3 746template <class P>
865 // Now we need to figure the relationship between the texture and its overlaps 747void TextureCache<P>::PopAsyncFlushes() {
866 // we do a topological test to ensure we can find some relationship. If it fails 748 if (committed_downloads.empty()) {
867 // immediately recycle the texture 749 return;
868 for (const auto& surface : overlaps) { 750 }
869 const auto topological_result = surface->MatchesTopology(params); 751 const std::span<const ImageId> download_ids = committed_downloads.front();
870 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (download_ids.empty()) {
871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 committed_downloads.pop();
872 topological_result); 754 return;
873 } 755 }
874 } 756 size_t total_size_bytes = 0;
757 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
761 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies);
766 buffer_offset += image.unswizzled_size_bytes;
767 }
768 // Wait for downloads to finish
769 runtime.Finish();
770
771 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span();
773 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
778 buffer_offset += image.unswizzled_size_bytes;
779 }
780 committed_downloads.pop();
781}
875 782
876 // Manage 3D textures 783template <class P>
877 if (params.block_depth > 0) { 784bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
878 auto surface = 785 bool is_modified = false;
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 786 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
880 if (surface) { 787 if (False(image.flags & ImageFlagBits::GpuModified)) {
881 return *surface; 788 return false;
882 }
883 } 789 }
790 is_modified = true;
791 return true;
792 });
793 return is_modified;
794}
884 795
885 // Split cases between 1 overlap or many. 796template <class P>
886 if (overlaps.size() == 1) { 797void TextureCache<P>::RefreshContents(Image& image) {
887 TSurface current_surface = overlaps[0]; 798 if (False(image.flags & ImageFlagBits::CpuModified)) {
888 // First check if the surface is within the overlap. If not, it means 799 // Only upload modified images
889 // two things either the candidate surface is a supertexture of the overlap 800 return;
890 // or they don't match in any known way. 801 }
891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 802 image.flags &= ~ImageFlagBits::CpuModified;
892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 803 TrackImage(image);
893 if (view) {
894 return *view;
895 }
896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897 MatchTopologyResult::FullMatch);
898 }
899 // Now we check if the candidate is a mipmap/layer of the overlap
900 std::optional<TView> view =
901 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902 if (view) {
903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904 if (is_mirage) {
905 // On a mirage view, we need to recreate the surface under this new view
906 // and then obtain a view again.
907 SurfaceParams new_params = current_surface->GetSurfaceParams();
908 const u32 wh = SurfaceParams::ConvertWidth(
909 new_params.width, new_params.pixel_format, params.pixel_format);
910 const u32 hh = SurfaceParams::ConvertHeight(
911 new_params.height, new_params.pixel_format, params.pixel_format);
912 new_params.width = wh;
913 new_params.height = hh;
914 new_params.pixel_format = params.pixel_format;
915 std::pair<TSurface, TView> pair =
916 RebuildSurface(current_surface, new_params, is_render);
917 std::optional<TView> mirage_view =
918 pair.first->EmplaceView(params, gpu_addr, candidate_size);
919 if (mirage_view)
920 return {pair.first, *mirage_view};
921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922 MatchTopologyResult::FullMatch);
923 }
924 return {current_surface, *view};
925 }
926 } else {
927 // If there are many overlaps, odds are they are subtextures of the candidate
928 // surface. We try to construct a new surface based on the candidate parameters,
929 // using the overlaps. If a single overlap fails, this will fail.
930 std::optional<std::pair<TSurface, TView>> view =
931 TryReconstructSurface(overlaps, params, gpu_addr);
932 if (view) {
933 return *view;
934 }
935 }
936 // We failed all the tests, recycle the overlaps into a new texture.
937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938 MatchTopologyResult::FullMatch);
939 }
940
941 /**
942 * Gets the starting address and parameters of a candidate surface and tries to find a
943 * matching surface within the cache that's similar to it. If there are many textures
944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945 * blit will be unsuccessful.
946 *
947 * @param gpu_addr The starting address of the candidate surface.
948 * @param params The parameters on the candidate surface.
949 **/
950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952
953 if (!cpu_addr) {
954 Deduction result{};
955 result.type = DeductionType::DeductionFailed;
956 return result;
957 }
958 804
959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 805 if (image.info.num_samples > 1) {
960 TSurface& current_surface = iter->second; 806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
961 const auto topological_result = current_surface->MatchesTopology(params); 807 return;
962 if (topological_result != MatchTopologyResult::FullMatch) { 808 }
963 Deduction result{}; 809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
964 result.type = DeductionType::DeductionFailed; 810 UploadImageContents(image, map, 0);
965 return result; 811 runtime.InsertUploadMemoryBarrier();
966 } 812}
967 const auto struct_result = current_surface->MatchesStructure(params);
968 if (struct_result != MatchStructureResult::None &&
969 current_surface->MatchTarget(params.target)) {
970 Deduction result{};
971 result.type = DeductionType::DeductionComplete;
972 result.surface = current_surface;
973 return result;
974 }
975 }
976 813
977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 814template <class P>
978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 815template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr;
819
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies);
829 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies);
832 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies);
835 }
836}
979 837
980 if (overlaps.empty()) { 838template <class P>
981 Deduction result{}; 839ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
982 result.type = DeductionType::DeductionIncomplete; 840 if (!IsValidAddress(gpu_memory, config)) {
983 return result; 841 return NULL_IMAGE_VIEW_ID;
984 } 842 }
843 const auto [pair, is_new] = image_views.try_emplace(config);
844 ImageViewId& image_view_id = pair->second;
845 if (is_new) {
846 image_view_id = CreateImageView(config);
847 }
848 return image_view_id;
849}
985 850
986 if (overlaps.size() > 1) { 851template <class P>
987 Deduction result{}; 852ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
988 result.type = DeductionType::DeductionFailed; 853 const ImageInfo info(config);
989 return result; 854 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
990 } else { 855 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
991 Deduction result{}; 856 if (!image_id) {
992 result.type = DeductionType::DeductionComplete; 857 return NULL_IMAGE_VIEW_ID;
993 result.surface = overlaps[0];
994 return result;
995 }
996 } 858 }
859 ImageBase& image = slot_images[image_id];
860 const SubresourceBase base = image.TryFindBase(config.Address()).value();
861 ASSERT(base.level == 0);
862 const ImageViewInfo view_info(config, base.layer);
863 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
864 ImageViewBase& image_view = slot_image_views[image_view_id];
865 image_view.flags |= ImageViewFlagBits::Strong;
866 image.flags |= ImageFlagBits::Strong;
867 return image_view_id;
868}
997 869
998 /** 870template <class P>
999 * Gets a null surface based on a target texture. 871ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1000 * @param target The target of the null surface. 872 RelaxedOptions options) {
1001 */ 873 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1002 TView GetNullSurface(SurfaceTarget target) { 874 return image_id;
1003 const u32 i_target = static_cast<u32>(target); 875 }
1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 876 return InsertImage(info, gpu_addr, options);
1005 return it->second->GetMainView(); 877}
1006 } 878
1007 SurfaceParams params{}; 879template <class P>
1008 params.target = target; 880ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1009 params.is_tiled = false; 881 RelaxedOptions options) {
1010 params.srgb_conversion = false; 882 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1011 params.is_layered = 883 if (!cpu_addr) {
1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 884 return ImageId{};
1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 885 }
1014 params.block_width = 0; 886 ImageId image_id;
1015 params.block_height = 0; 887 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1016 params.block_depth = 0; 888 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1017 params.tile_width_spacing = 1; 889 const bool strict_size = False(options & RelaxedOptions::Size) &&
1018 params.width = 1; 890 True(existing_image.flags & ImageFlagBits::Strong);
1019 params.height = 1; 891 const ImageInfo& existing = existing_image.info;
1020 params.depth = 1; 892 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 893 existing.pitch == info.pitch &&
1022 params.depth = 6; 894 IsPitchLinearSameSize(existing, info, strict_size) &&
1023 } 895 IsViewCompatible(existing.format, info.format)) {
1024 params.pitch = 4; 896 image_id = existing_image_id;
1025 params.num_levels = 1; 897 return true;
1026 params.emulated_levels = 1; 898 }
1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 899 } else if (IsSubresource(info, existing_image, gpu_addr, options)) {
1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 900 image_id = existing_image_id;
1029 auto surface = CreateSurface(0ULL, params); 901 return true;
1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
1031 surface->UploadTexture(invalid_memory);
1032 surface->MarkAsModified(false, Tick());
1033 invalid_cache.emplace(i_target, surface);
1034 return surface->GetMainView();
1035 }
1036
1037 /**
1038 * Gets the a source and destination starting address and parameters,
1039 * and tries to deduce if they are supposed to be depth textures. If so, their
1040 * parameters are modified and fixed into so.
1041 *
1042 * @param src_params The parameters of the candidate surface.
1043 * @param dst_params The parameters of the destination surface.
1044 * @param src_gpu_addr The starting address of the candidate surface.
1045 * @param dst_gpu_addr The starting address of the destination surface.
1046 **/
1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051 if (deduced_src.Failed() || deduced_dst.Failed()) {
1052 return;
1053 } 902 }
903 return false;
904 };
905 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
906 return image_id;
907}
1054 908
1055 const bool incomplete_src = deduced_src.Incomplete(); 909template <class P>
1056 const bool incomplete_dst = deduced_dst.Incomplete(); 910ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
911 RelaxedOptions options) {
912 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
913 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
914 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
915 const Image& image = slot_images[image_id];
916 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
917 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
918 if (is_new) {
919 it->second = slot_image_allocs.insert();
920 }
921 slot_image_allocs[it->second].images.push_back(image_id);
922 return image_id;
923}
1057 924
1058 if (incomplete_src && incomplete_dst) { 925template <class P>
926ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
927 ImageInfo new_info = info;
928 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
929 std::vector<ImageId> overlap_ids;
930 std::vector<ImageId> left_aliased_ids;
931 std::vector<ImageId> right_aliased_ids;
932 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
933 if (info.type != overlap.info.type) {
1059 return; 934 return;
1060 } 935 }
1061 936 if (info.type == ImageType::Linear) {
1062 const bool any_incomplete = incomplete_src || incomplete_dst; 937 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1063 938 // Alias linear images with the same pitch
1064 if (!any_incomplete) { 939 left_aliased_ids.push_back(overlap_id);
1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066 return;
1067 }
1068 } else {
1069 if (incomplete_src && !(deduced_dst.IsDepth())) {
1070 return;
1071 }
1072
1073 if (incomplete_dst && !(deduced_src.IsDepth())) {
1074 return;
1075 } 940 }
941 return;
942 }
943 const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true);
944 if (solution) {
945 gpu_addr = solution->gpu_addr;
946 cpu_addr = solution->cpu_addr;
947 new_info.resources = solution->resources;
948 overlap_ids.push_back(overlap_id);
949 return;
950 }
951 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
952 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
953 if (IsSubresource(new_info, overlap, gpu_addr, options)) {
954 left_aliased_ids.push_back(overlap_id);
955 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) {
956 right_aliased_ids.push_back(overlap_id);
1076 } 957 }
958 });
959 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
960 Image& new_image = slot_images[new_image_id];
1077 961
1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 962 // TODO: Only upload what we need
1079 const SurfaceParams& params = from->GetSurfaceParams(); 963 RefreshContents(new_image);
1080 to.pixel_format = params.pixel_format; 964
1081 to.type = params.type; 965 for (const ImageId overlap_id : overlap_ids) {
1082 }; 966 Image& overlap = slot_images[overlap_id];
1083 // Now we got the cases where one or both is Depth and the other is not known 967 if (overlap.info.num_samples != new_image.info.num_samples) {
1084 if (!incomplete_src) { 968 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1085 inherit_format(src_params, deduced_src.surface);
1086 } else { 969 } else {
1087 inherit_format(src_params, deduced_dst.surface); 970 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
971 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
972 runtime.CopyImage(new_image, overlap, copies);
1088 } 973 }
1089 if (!incomplete_dst) { 974 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1090 inherit_format(dst_params, deduced_dst.surface); 975 UntrackImage(overlap);
1091 } else {
1092 inherit_format(dst_params, deduced_src.surface);
1093 } 976 }
977 UnregisterImage(overlap_id);
978 DeleteImage(overlap_id);
979 }
980 ImageBase& new_image_base = new_image;
981 for (const ImageId aliased_id : right_aliased_ids) {
982 ImageBase& aliased = slot_images[aliased_id];
983 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
984 }
985 for (const ImageId aliased_id : left_aliased_ids) {
986 ImageBase& aliased = slot_images[aliased_id];
987 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1094 } 988 }
989 RegisterImage(new_image_id);
990 return new_image_id;
991}
1095 992
1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 993template <class P>
1097 bool preserve_contents) { 994typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 995 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1099 Register(new_surface); 996 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1100 if (preserve_contents) { 997 const GPUVAddr dst_addr = dst.Address();
1101 LoadSurface(new_surface); 998 const GPUVAddr src_addr = src.Address();
1102 } 999 ImageInfo dst_info(dst);
1103 return {new_surface, new_surface->GetMainView()}; 1000 ImageInfo src_info(src);
1001 ImageId dst_id;
1002 ImageId src_id;
1003 do {
1004 has_deleted_images = false;
1005 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1006 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1007 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1008 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1009 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1010 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1011 continue;
1012 }
1013 if (!dst_id) {
1014 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1015 }
1016 if (!src_id) {
1017 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1018 }
1019 } while (has_deleted_images);
1020 return BlitImages{
1021 .dst_id = dst_id,
1022 .src_id = src_id,
1023 .dst_format = dst_info.format,
1024 .src_format = src_info.format,
1025 };
1026}
1027
1028template <class P>
1029SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1030 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1031 return NULL_SAMPLER_ID;
1032 }
1033 const auto [pair, is_new] = samplers.try_emplace(config);
1034 if (is_new) {
1035 pair->second = slot_samplers.insert(runtime, config);
1104 } 1036 }
1037 return pair->second;
1038}
1105 1039
1106 void LoadSurface(const TSurface& surface) { 1040template <class P>
1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1041ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1108 surface->LoadBuffer(gpu_memory, staging_cache); 1042 const auto& regs = maxwell3d.regs;
1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1043 if (index >= regs.rt_control.count) {
1110 surface->MarkAsModified(false, Tick()); 1044 return ImageViewId{};
1045 }
1046 const auto& rt = regs.rt[index];
1047 const GPUVAddr gpu_addr = rt.Address();
1048 if (gpu_addr == 0) {
1049 return ImageViewId{};
1050 }
1051 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1052 return ImageViewId{};
1111 } 1053 }
1054 const ImageInfo info(regs, index);
1055 return FindRenderTargetView(info, gpu_addr, is_clear);
1056}
1112 1057
1113 void FlushSurface(const TSurface& surface) { 1058template <class P>
1114 if (!surface->IsModified()) { 1059ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1115 return; 1060 const auto& regs = maxwell3d.regs;
1116 } 1061 if (!regs.zeta_enable) {
1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1062 return ImageViewId{};
1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1063 }
1119 surface->FlushBuffer(gpu_memory, staging_cache); 1064 const GPUVAddr gpu_addr = regs.zeta.Address();
1120 surface->MarkAsModified(false, Tick()); 1065 if (gpu_addr == 0) {
1121 } 1066 return ImageViewId{};
1122
1123 void RegisterInnerCache(TSurface& surface) {
1124 const VAddr cpu_addr = surface->GetCpuAddr();
1125 VAddr start = cpu_addr >> registry_page_bits;
1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127 l1_cache[cpu_addr] = surface;
1128 while (start <= end) {
1129 registry[start].push_back(surface);
1130 start++;
1131 }
1132 } 1067 }
1068 const ImageInfo info(regs);
1069 return FindRenderTargetView(info, gpu_addr, is_clear);
1070}
1133 1071
1134 void UnregisterInnerCache(TSurface& surface) { 1072template <class P>
1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1073ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1136 VAddr start = cpu_addr >> registry_page_bits; 1074 bool is_clear) {
1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1075 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1138 l1_cache.erase(cpu_addr); 1076 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1139 while (start <= end) { 1077 if (!image_id) {
1140 auto& reg{registry[start]}; 1078 return NULL_IMAGE_VIEW_ID;
1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1079 }
1142 start++; 1080 Image& image = slot_images[image_id];
1143 } 1081 const ImageViewType view_type = RenderTargetImageViewType(info);
1082 SubresourceBase base;
1083 if (image.info.type == ImageType::Linear) {
1084 base = SubresourceBase{.level = 0, .layer = 0};
1085 } else {
1086 base = image.TryFindBase(gpu_addr).value();
1144 } 1087 }
1088 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1089 const SubresourceRange range{
1090 .base = base,
1091 .extent = {.levels = 1, .layers = layers},
1092 };
1093 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1094}
1145 1095
1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1096template <class P>
1147 if (size == 0) { 1097template <typename Func>
1148 return {}; 1098void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1099 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1100 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1101 boost::container::small_vector<ImageId, 32> images;
1102 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1103 const auto it = page_table.find(page);
1104 if (it == page_table.end()) {
1105 if constexpr (BOOL_BREAK) {
1106 return false;
1107 } else {
1108 return;
1109 }
1149 } 1110 }
1150 const VAddr cpu_addr_end = cpu_addr + size; 1111 for (const ImageId image_id : it->second) {
1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1112 Image& image = slot_images[image_id];
1152 VectorSurface surfaces; 1113 if (True(image.flags & ImageFlagBits::Picked)) {
1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154 const auto it = registry.find(start);
1155 if (it == registry.end()) {
1156 continue; 1114 continue;
1157 } 1115 }
1158 for (auto& surface : it->second) { 1116 if (!image.Overlaps(cpu_addr, size)) {
1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1117 continue;
1160 continue; 1118 }
1119 image.flags |= ImageFlagBits::Picked;
1120 images.push_back(image_id);
1121 if constexpr (BOOL_BREAK) {
1122 if (func(image_id, image)) {
1123 return true;
1161 } 1124 }
1162 surface->MarkAsPicked(true); 1125 } else {
1163 surfaces.push_back(surface); 1126 func(image_id, image);
1164 } 1127 }
1165 } 1128 }
1166 for (auto& surface : surfaces) { 1129 if constexpr (BOOL_BREAK) {
1167 surface->MarkAsPicked(false); 1130 return false;
1168 } 1131 }
1169 return surfaces; 1132 });
1133 for (const ImageId image_id : images) {
1134 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1170 } 1135 }
1136}
1171 1137
1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1138template <class P>
1173 surface_reserve[params].push_back(std::move(surface)); 1139ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1140 Image& image = slot_images[image_id];
1141 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1142 return image_view_id;
1174 } 1143 }
1144 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1145 image.InsertView(info, image_view_id);
1146 return image_view_id;
1147}
1148
1149template <class P>
1150void TextureCache<P>::RegisterImage(ImageId image_id) {
1151 ImageBase& image = slot_images[image_id];
1152 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1153 "Trying to register an already registered image");
1154 image.flags |= ImageFlagBits::Registered;
1155 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1156 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1157}
1175 1158
1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1159template <class P>
1177 auto search{surface_reserve.find(params)}; 1160void TextureCache<P>::UnregisterImage(ImageId image_id) {
1178 if (search == surface_reserve.end()) { 1161 Image& image = slot_images[image_id];
1179 return {}; 1162 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1163 "Trying to unregister an already registered image");
1164 image.flags &= ~ImageFlagBits::Registered;
1165 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1166 const auto page_it = page_table.find(page);
1167 if (page_it == page_table.end()) {
1168 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT);
1169 return;
1180 } 1170 }
1181 for (auto& surface : search->second) { 1171 std::vector<ImageId>& image_ids = page_it->second;
1182 if (!surface->IsRegistered()) { 1172 const auto vector_it = std::ranges::find(image_ids, image_id);
1183 return surface; 1173 if (vector_it == image_ids.end()) {
1184 } 1174 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT);
1175 return;
1185 } 1176 }
1186 return {}; 1177 image_ids.erase(vector_it);
1187 } 1178 });
1179}
1188 1180
1189 /// Try to do an image copy logging when formats are incompatible. 1181template <class P>
1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1182void TextureCache<P>::TrackImage(ImageBase& image) {
1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1183 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1184 image.flags |= ImageFlagBits::Tracked;
1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1185 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, 1186}
1195 src_params.pixel_format); 1187
1196 return; 1188template <class P>
1189void TextureCache<P>::UntrackImage(ImageBase& image) {
1190 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1191 image.flags &= ~ImageFlagBits::Tracked;
1192 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1193}
1194
1195template <class P>
1196void TextureCache<P>::DeleteImage(ImageId image_id) {
1197 ImageBase& image = slot_images[image_id];
1198 const GPUVAddr gpu_addr = image.gpu_addr;
1199 const auto alloc_it = image_allocs_table.find(gpu_addr);
1200 if (alloc_it == image_allocs_table.end()) {
1201 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1202 gpu_addr);
1203 return;
1204 }
1205 const ImageAllocId alloc_id = alloc_it->second;
1206 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1207 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1208 if (alloc_image_it == alloc_images.end()) {
1209 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1210 return;
1211 }
1212 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1213 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1214
1215 // Mark render targets as dirty
1216 auto& dirty = maxwell3d.dirty.flags;
1217 dirty[Dirty::RenderTargets] = true;
1218 dirty[Dirty::ZetaBuffer] = true;
1219 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1220 dirty[Dirty::ColorBuffer0 + rt] = true;
1221 }
1222 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1223 for (const ImageViewId image_view_id : image_view_ids) {
1224 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1225 if (render_targets.depth_buffer_id == image_view_id) {
1226 render_targets.depth_buffer_id = ImageViewId{};
1197 } 1227 }
1198 ImageCopy(src, dst, copy);
1199 } 1228 }
1229 RemoveImageViewReferences(image_view_ids);
1230 RemoveFramebuffers(image_view_ids);
1231
1232 for (const AliasedImage& alias : image.aliased_images) {
1233 ImageBase& other_image = slot_images[alias.id];
1234 [[maybe_unused]] const size_t num_removed_aliases =
1235 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1236 return other_alias.id == image_id;
1237 });
1238 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1239 num_removed_aliases);
1240 }
1241 for (const ImageViewId image_view_id : image_view_ids) {
1242 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1243 slot_image_views.erase(image_view_id);
1244 }
1245 sentenced_images.Push(std::move(slot_images[image_id]));
1246 slot_images.erase(image_id);
1200 1247
1201 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1248 alloc_images.erase(alloc_image_it);
1202 return siblings_table[static_cast<std::size_t>(format)]; 1249 if (alloc_images.empty()) {
1250 image_allocs_table.erase(alloc_it);
1203 } 1251 }
1252 if constexpr (ENABLE_VALIDATION) {
1253 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1254 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1255 }
1256 graphics_image_table.Invalidate();
1257 compute_image_table.Invalidate();
1258 has_deleted_images = true;
1259}
1204 1260
1205 /// Returns true the shader sampler entry is compatible with the TIC texture type. 1261template <class P>
1206 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1262void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1207 const VideoCommon::Shader::Sampler& entry) { 1263 auto it = image_views.begin();
1208 const auto shader_type = entry.type; 1264 while (it != image_views.end()) {
1209 switch (tic_type) { 1265 const auto found = std::ranges::find(removed_views, it->second);
1210 case Tegra::Texture::TextureType::Texture1D: 1266 if (found != removed_views.end()) {
1211 case Tegra::Texture::TextureType::Texture1DArray: 1267 it = image_views.erase(it);
1212 return shader_type == Tegra::Shader::TextureType::Texture1D; 1268 } else {
1213 case Tegra::Texture::TextureType::Texture1DBuffer: 1269 ++it;
1214 // TODO(Rodrigo): Assume as valid for now
1215 return true;
1216 case Tegra::Texture::TextureType::Texture2D:
1217 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1218 return shader_type == Tegra::Shader::TextureType::Texture2D;
1219 case Tegra::Texture::TextureType::Texture2DArray:
1220 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1221 shader_type == Tegra::Shader::TextureType::TextureCube;
1222 case Tegra::Texture::TextureType::Texture3D:
1223 return shader_type == Tegra::Shader::TextureType::Texture3D;
1224 case Tegra::Texture::TextureType::TextureCubeArray:
1225 case Tegra::Texture::TextureType::TextureCubemap:
1226 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1227 return true;
1228 }
1229 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1230 } 1270 }
1231 UNREACHABLE();
1232 return true;
1233 } 1271 }
1272}
1234 1273
1235 struct FramebufferTargetInfo { 1274template <class P>
1236 TSurface target; 1275void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1237 TView view; 1276 auto it = framebuffers.begin();
1238 }; 1277 while (it != framebuffers.end()) {
1239 1278 if (it->first.Contains(removed_views)) {
1240 void AsyncFlushSurface(TSurface& surface) { 1279 it = framebuffers.erase(it);
1241 if (!uncommitted_flushes) { 1280 } else {
1242 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1281 ++it;
1243 } 1282 }
1244 uncommitted_flushes->push_back(surface);
1245 } 1283 }
1284}
1246 1285
1247 VideoCore::RasterizerInterface& rasterizer; 1286template <class P>
1248 Tegra::Engines::Maxwell3D& maxwell3d; 1287void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1249 Tegra::MemoryManager& gpu_memory; 1288 image.flags |= ImageFlagBits::GpuModified;
1250 1289 image.modification_tick = ++modification_tick;
1251 FormatLookupTable format_lookup_table; 1290}
1252 FormatCompatibility format_compatibility;
1253
1254 u64 ticks{};
1255
1256 // Guards the cache for protection conflicts.
1257 bool guard_render_targets{};
1258 bool guard_samplers{};
1259
1260 // The siblings table is for formats that can inter exchange with one another
1261 // without causing issues. This is only valid when a conflict occurs on a non
1262 // rendering use.
1263 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1264
1265 // The internal Cache is different for the Texture Cache. It's based on buckets
1266 // of 1MB. This fits better for the purpose of this cache as textures are normaly
1267 // large in size.
1268 static constexpr u64 registry_page_bits{20};
1269 static constexpr u64 registry_page_size{1 << registry_page_bits};
1270 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1271 1291
1272 static constexpr u32 DEPTH_RT = 8; 1292template <class P>
1273 static constexpr u32 NO_RT = 0xFFFFFFFF; 1293void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1294 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1295 ImageBase& image = slot_images[image_id];
1296 u64 most_recent_tick = image.modification_tick;
1297 for (const AliasedImage& aliased : image.aliased_images) {
1298 ImageBase& aliased_image = slot_images[aliased.id];
1299 if (image.modification_tick < aliased_image.modification_tick) {
1300 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1301 aliased_images.push_back(&aliased);
1302 }
1303 }
1304 if (aliased_images.empty()) {
1305 return;
1306 }
1307 image.modification_tick = most_recent_tick;
1308 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1309 const ImageBase& lhs_image = slot_images[lhs->id];
1310 const ImageBase& rhs_image = slot_images[rhs->id];
1311 return lhs_image.modification_tick < rhs_image.modification_tick;
1312 });
1313 for (const AliasedImage* const aliased : aliased_images) {
1314 CopyImage(image_id, aliased->id, aliased->copies);
1315 }
1316}
1274 1317
1275 // The L1 Cache is used for fast texture lookup before checking the overlaps 1318template <class P>
1276 // This avoids calculating size and other stuffs. 1319void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1277 std::unordered_map<VAddr, TSurface> l1_cache; 1320 Image& image = slot_images[image_id];
1321 if (invalidate) {
1322 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1323 if (False(image.flags & ImageFlagBits::Tracked)) {
1324 TrackImage(image);
1325 }
1326 } else {
1327 RefreshContents(image);
1328 SynchronizeAliases(image_id);
1329 }
1330 if (is_modification) {
1331 MarkModification(image);
1332 }
1333 image.frame_tick = frame_tick;
1334}
1278 1335
1279 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1336template <class P>
1280 /// previously been used. This is to prevent surfaces from being constantly created and 1337void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1281 /// destroyed when used with different surface parameters. 1338 bool invalidate) {
1282 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1339 if (!image_view_id) {
1283 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1340 return;
1284 render_targets; 1341 }
1285 FramebufferTargetInfo depth_buffer; 1342 const ImageViewBase& image_view = slot_image_views[image_view_id];
1343 PrepareImage(image_view.image_id, is_modification, invalidate);
1344}
1286 1345
1287 std::vector<TSurface> sampled_textures; 1346template <class P>
1347void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1348 Image& dst = slot_images[dst_id];
1349 Image& src = slot_images[src_id];
1350 const auto dst_format_type = GetFormatType(dst.info.format);
1351 const auto src_format_type = GetFormatType(src.info.format);
1352 if (src_format_type == dst_format_type) {
1353 if constexpr (HAS_EMULATED_COPIES) {
1354 if (!runtime.CanImageBeCopied(dst, src)) {
1355 return runtime.EmulateCopyImage(dst, src, copies);
1356 }
1357 }
1358 return runtime.CopyImage(dst, src, copies);
1359 }
1360 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1361 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1362 for (const ImageCopy& copy : copies) {
1363 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1364 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1365 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1366 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1367
1368 const SubresourceBase dst_base{
1369 .level = copy.dst_subresource.base_level,
1370 .layer = copy.dst_subresource.base_layer,
1371 };
1372 const SubresourceBase src_base{
1373 .level = copy.src_subresource.base_level,
1374 .layer = copy.src_subresource.base_layer,
1375 };
1376 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1377 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1378 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1379 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1380 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1381 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1382 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1383 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1384 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1385 ImageView& dst_view = slot_image_views[dst_view_id];
1386 ImageView& src_view = slot_image_views[src_view_id];
1387 [[maybe_unused]] const Extent3D expected_size{
1388 .width = std::min(dst_view.size.width, src_view.size.width),
1389 .height = std::min(dst_view.size.height, src_view.size.height),
1390 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1391 };
1392 UNIMPLEMENTED_IF(copy.extent != expected_size);
1288 1393
1289 /// This cache stores null surfaces in order to be used as a placeholder 1394 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1290 /// for invalid texture calls. 1395 }
1291 std::unordered_map<u32, TSurface> invalid_cache; 1396}
1292 std::vector<u8> invalid_memory;
1293 1397
1294 std::list<TSurface> marked_for_unregister; 1398template <class P>
1399void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1400 if (*old_id == new_id) {
1401 return;
1402 }
1403 if (*old_id) {
1404 const ImageViewBase& old_view = slot_image_views[*old_id];
1405 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1406 uncommitted_downloads.push_back(old_view.image_id);
1407 }
1408 }
1409 *old_id = new_id;
1410}
1295 1411
1296 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1412template <class P>
1297 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1413std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1414 ImageId image_id, const ImageViewInfo& view_info) {
1415 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1416 const ImageBase& image = slot_images[image_id];
1417 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1418 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1419 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1420 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1421 const u32 num_samples = image.info.num_samples;
1422 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1423 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1424 .color_buffer_ids = {color_view_id},
1425 .depth_buffer_id = depth_view_id,
1426 .size = {extent.width >> samples_x, extent.height >> samples_y},
1427 });
1428 return {framebuffer_id, view_id};
1429}
1298 1430
1299 StagingCache staging_cache; 1431template <class P>
1300 std::recursive_mutex mutex; 1432bool TextureCache<P>::IsFullClear(ImageViewId id) {
1301}; 1433 if (!id) {
1434 return true;
1435 }
1436 const ImageViewBase& image_view = slot_image_views[id];
1437 const ImageBase& image = slot_images[image_view.image_id];
1438 const Extent3D size = image_view.size;
1439 const auto& regs = maxwell3d.regs;
1440 const auto& scissor = regs.scissor_test[0];
1441 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1442 // Images with multiple resources can't be cleared in a single call
1443 return false;
1444 }
1445 if (regs.clear_flags.scissor == 0) {
1446 // If scissor testing is disabled, the clear is always full
1447 return true;
1448 }
1449 // Make sure the clear covers all texels in the subresource
1450 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1451 scissor.max_y >= size.height;
1452}
1302 1453
1303} // namespace VideoCommon 1454} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9#include "video_core/texture_cache/slot_vector.h"
10
11namespace VideoCommon {
12
13constexpr size_t NUM_RT = 8;
14constexpr size_t MAX_MIP_LEVELS = 14;
15
16constexpr SlotId CORRUPT_ID{0xfffffffe};
17
18using ImageId = SlotId;
19using ImageViewId = SlotId;
20using ImageAllocId = SlotId;
21using SamplerId = SlotId;
22using FramebufferId = SlotId;
23
24enum class ImageType : u32 {
25 e1D,
26 e2D,
27 e3D,
28 Linear,
29 Buffer,
30};
31
32enum class ImageViewType : u32 {
33 e1D,
34 e2D,
35 Cube,
36 e3D,
37 e1DArray,
38 e2DArray,
39 CubeArray,
40 Rect,
41 Buffer,
42};
43constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
44
45enum class RelaxedOptions : u32 {
46 Size = 1 << 0,
47 Format = 1 << 1,
48 Samples = 1 << 2,
49};
50DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
51
52struct Offset2D {
53 constexpr auto operator<=>(const Offset2D&) const noexcept = default;
54
55 s32 x;
56 s32 y;
57};
58
59struct Offset3D {
60 constexpr auto operator<=>(const Offset3D&) const noexcept = default;
61
62 s32 x;
63 s32 y;
64 s32 z;
65};
66
67struct Extent2D {
68 constexpr auto operator<=>(const Extent2D&) const noexcept = default;
69
70 u32 width;
71 u32 height;
72};
73
74struct Extent3D {
75 constexpr auto operator<=>(const Extent3D&) const noexcept = default;
76
77 u32 width;
78 u32 height;
79 u32 depth;
80};
81
82struct SubresourceLayers {
83 s32 base_level = 0;
84 s32 base_layer = 0;
85 s32 num_layers = 1;
86};
87
88struct SubresourceBase {
89 constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
90
91 s32 level = 0;
92 s32 layer = 0;
93};
94
95struct SubresourceExtent {
96 constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
97
98 s32 levels = 1;
99 s32 layers = 1;
100};
101
102struct SubresourceRange {
103 constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
104
105 SubresourceBase base;
106 SubresourceExtent extent;
107};
108
109struct ImageCopy {
110 SubresourceLayers src_subresource;
111 SubresourceLayers dst_subresource;
112 Offset3D src_offset;
113 Offset3D dst_offset;
114 Extent3D extent;
115};
116
117struct BufferImageCopy {
118 size_t buffer_offset;
119 size_t buffer_size;
120 u32 buffer_row_length;
121 u32 buffer_image_height;
122 SubresourceLayers image_subresource;
123 Offset3D image_offset;
124 Extent3D image_extent;
125};
126
127struct BufferCopy {
128 size_t src_offset;
129 size_t dst_offset;
130 size_t size;
131};
132
133struct SwizzleParameters {
134 Extent3D num_tiles;
135 Extent3D block;
136 size_t buffer_offset;
137 s32 level;
138};
139
140} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..9ed1fc007
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1232 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This files contains code from Ryujinx
6// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
7// The sections using code from Ryujinx are marked with a link to the original version
8
9// MIT License
10//
11// Copyright (c) Ryujinx Team and Contributors
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
14// associated documentation files (the "Software"), to deal in the Software without restriction,
15// including without limitation the rights to use, copy, modify, merge, publish, distribute,
16// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in all copies or
20// substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
23// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
25// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27//
28
29#include <algorithm>
30#include <array>
31#include <numeric>
32#include <optional>
33#include <span>
34#include <vector>
35
36#include "common/alignment.h"
37#include "common/assert.h"
38#include "common/bit_util.h"
39#include "common/common_types.h"
40#include "common/div_ceil.h"
41#include "video_core/compatible_formats.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/surface.h"
45#include "video_core/texture_cache/decode_bc4.h"
46#include "video_core/texture_cache/format_lookup_table.h"
47#include "video_core/texture_cache/formatter.h"
48#include "video_core/texture_cache/samples_helper.h"
49#include "video_core/texture_cache/util.h"
50#include "video_core/textures/astc.h"
51#include "video_core/textures/decoders.h"
52
53namespace VideoCommon {
54
55namespace {
56
57using Tegra::Texture::GOB_SIZE;
58using Tegra::Texture::GOB_SIZE_SHIFT;
59using Tegra::Texture::GOB_SIZE_X;
60using Tegra::Texture::GOB_SIZE_X_SHIFT;
61using Tegra::Texture::GOB_SIZE_Y;
62using Tegra::Texture::GOB_SIZE_Y_SHIFT;
63using Tegra::Texture::GOB_SIZE_Z;
64using Tegra::Texture::GOB_SIZE_Z_SHIFT;
65using Tegra::Texture::MsaaMode;
66using Tegra::Texture::SwizzleTexture;
67using Tegra::Texture::TextureFormat;
68using Tegra::Texture::TextureType;
69using Tegra::Texture::TICEntry;
70using Tegra::Texture::UnswizzleTexture;
71using VideoCore::Surface::BytesPerBlock;
72using VideoCore::Surface::DefaultBlockHeight;
73using VideoCore::Surface::DefaultBlockWidth;
74using VideoCore::Surface::IsCopyCompatible;
75using VideoCore::Surface::IsPixelFormatASTC;
76using VideoCore::Surface::IsViewCompatible;
77using VideoCore::Surface::PixelFormatFromDepthFormat;
78using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
79using VideoCore::Surface::SurfaceType;
80
81constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
82
83struct LevelInfo {
84 Extent3D size;
85 Extent3D block;
86 Extent2D tile_size;
87 u32 bpp_log2;
88 u32 tile_width_spacing;
89};
90
91[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
92 if (shift == 0) {
93 return 0;
94 }
95 u32 x = unit_factor << (shift - 1);
96 if (x >= dimension) {
97 while (--shift) {
98 x >>= 1;
99 if (x < dimension) {
100 break;
101 }
102 }
103 }
104 return shift;
105}
106
107[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
108 return std::max<u32>(size >> level, 1);
109}
110
111[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
112 return Extent3D{
113 .width = AdjustMipSize(size.width, level),
114 .height = AdjustMipSize(size.height, level),
115 .depth = AdjustMipSize(size.depth, level),
116 };
117}
118
119[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
120 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
121 return Extent3D{
122 .width = size.width >> samples_x,
123 .height = size.height >> samples_y,
124 .depth = size.depth,
125 };
126}
127
128template <u32 GOB_EXTENT>
129[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
130 do {
131 while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
132 --block_size;
133 }
134 } while (level--);
135 return block_size;
136}
137
138[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
139 u32 level) {
140 return {
141 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
142 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
143 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
144 };
145}
146
147[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
148 return {
149 .width = Common::DivCeil(size.width, tile_size.width),
150 .height = Common::DivCeil(size.height, tile_size.height),
151 .depth = size.depth,
152 };
153}
154
155[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
156 return std::countl_zero(bytes_per_block) ^ 0x1F;
157}
158
159[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
160 return BytesPerBlockLog2(BytesPerBlock(format));
161}
162
163[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
164 const Extent3D num_blocks = AdjustTileSize(size, tile_size);
165 return num_blocks.width * num_blocks.height * num_blocks.depth;
166}
167
168[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170}
171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) {
208 case 1:
209 return {1, 1};
210 case 2:
211 return {2, 1};
212 case 4:
213 return {2, 2};
214 case 8:
215 return {4, 2};
216 case 16:
217 return {4, 4};
218 }
219 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
220 return {1, 1};
221}
222
223[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
224 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
225}
226
227[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
228 return Extent3D{
229 .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
230 .height = AdjustSize(info.size.height, level, info.tile_size.height),
231 .depth = AdjustMipSize(info.size.depth, level),
232 };
233}
234
235[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
236 const Extent3D blocks = NumLevelBlocks(info, level);
237 return Extent3D{
238 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
239 .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
240 .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
241 };
242}
243
244[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
245 return Extent2D{
246 .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
247 .height = GOB_SIZE_Y_SHIFT + block_height,
248 };
249}
250
251[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
252 u32 block_depth) {
253 return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
254 num_tiles.depth < (1U << block_depth);
255}
256
257[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
258 u32 bpp_log2) {
259 if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
260 return GOB_SIZE_X_SHIFT - bpp_log2;
261 } else {
262 return gob.width;
263 }
264}
265
266[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
267 u32 tile_width_spacing) {
268 const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
269 return StrideAlignment(num_tiles, block, gob, bpp_log2);
270}
271
272[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
273 const Extent3D blocks = NumLevelBlocks(info, level);
274 const Extent2D gobs{
275 .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
276 .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
277 };
278 const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
279 const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
280 const u32 alignment = is_small ? 0 : info.tile_width_spacing;
281 return Extent2D{
282 .width = Common::AlignBits(gobs.width, alignment),
283 .height = gobs.height,
284 };
285}
286
287[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
288 const Extent3D blocks = NumLevelBlocks(info, level);
289 const Extent3D tile_shift = TileShift(info, level);
290 const Extent2D gobs = NumGobs(info, level);
291 return Extent3D{
292 .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
293 .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
294 .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
295 };
296}
297
298[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
299 const Extent3D tile_shift = TileShift(info, level);
300 const Extent3D tiles = LevelTiles(info, level);
301 const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
302 const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
303 return num_tiles << shift;
304}
305
306[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
307 u32 num_levels) {
308 ASSERT(num_levels <= MAX_MIP_LEVELS);
309 std::array<u32, MAX_MIP_LEVELS> sizes{};
310 for (u32 level = 0; level < num_levels; ++level) {
311 sizes[level] = CalculateLevelSize(info, level);
312 }
313 return sizes;
314}
315
316[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
317 u32 num_samples, u32 tile_width_spacing) {
318 const auto [samples_x, samples_y] = Samples(num_samples);
319 const u32 bytes_per_block = BytesPerBlock(format);
320 return {
321 .size =
322 {
323 .width = size.width * samples_x,
324 .height = size.height * samples_y,
325 .depth = size.depth,
326 },
327 .block = block,
328 .tile_size = DefaultBlockSize(format),
329 .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
330 .tile_width_spacing = tile_width_spacing,
331 };
332}
333
334[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
335 return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
336 info.tile_width_spacing);
337}
338
339[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
340 u32 num_samples, u32 tile_width_spacing,
341 u32 level) {
342 const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
343 u32 offset = 0;
344 for (u32 current_level = 0; current_level < level; ++current_level) {
345 offset += CalculateLevelSize(info, current_level);
346 }
347 return offset;
348}
349
350[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
351 u32 tile_size_y, u32 tile_width_spacing) {
352 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
353 if (tile_width_spacing > 0) {
354 const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
355 return Common::AlignBits(size_bytes, alignment_log2);
356 }
357 const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
358 while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
359 --block.height;
360 }
361 while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
362 --block.depth;
363 }
364 const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
365 const u32 num_blocks = size_bytes >> block_shift;
366 if (size_bytes != num_blocks << block_shift) {
367 return (num_blocks + 1) << block_shift;
368 }
369 return size_bytes;
370}
371
372[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
373 const ImageBase& overlap,
374 bool strict_size) {
375 const ImageInfo& info = overlap.info;
376 if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
377 return std::nullopt;
378 }
379 if (new_info.block != info.block) {
380 return std::nullopt;
381 }
382 const SubresourceExtent resources = new_info.resources;
383 return SubresourceExtent{
384 .levels = std::max(resources.levels, info.resources.levels),
385 .layers = std::max(resources.layers, info.resources.layers),
386 };
387}
388
389[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
390 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
391 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
392 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
393 const auto it = std::ranges::find(slice_offsets, diff);
394 if (it == slice_offsets.end()) {
395 return std::nullopt;
396 }
397 const std::vector subresources = CalculateSliceSubresources(new_info);
398 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
399 const ImageInfo& info = overlap.info;
400 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
401 return std::nullopt;
402 }
403 const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
404 if (mip_depth < info.size.depth + base.layer) {
405 return std::nullopt;
406 }
407 if (MipBlockSize(new_info, base.level) != info.block) {
408 return std::nullopt;
409 }
410 return SubresourceExtent{
411 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
412 .layers = 1,
413 };
414}
415
416[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
417 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
418 const u32 layer_stride = new_info.layer_stride;
419 const s32 new_size = layer_stride * new_info.resources.layers;
420 const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
421 if (diff > new_size) {
422 return std::nullopt;
423 }
424 const s32 base_layer = diff / layer_stride;
425 const s32 mip_offset = diff % layer_stride;
426 const std::array offsets = CalculateMipLevelOffsets(new_info);
427 const auto end = offsets.begin() + new_info.resources.levels;
428 const auto it = std::find(offsets.begin(), end, mip_offset);
429 if (it == end) {
430 // Mipmap is not aligned to any valid size
431 return std::nullopt;
432 }
433 const SubresourceBase base{
434 .level = static_cast<s32>(std::distance(offsets.begin(), it)),
435 .layer = base_layer,
436 };
437 const ImageInfo& info = overlap.info;
438 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
439 return std::nullopt;
440 }
441 if (MipBlockSize(new_info, base.level) != info.block) {
442 return std::nullopt;
443 }
444 return SubresourceExtent{
445 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
446 .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
447 };
448}
449
450[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
451 GPUVAddr gpu_addr,
452 VAddr cpu_addr,
453 const ImageBase& overlap,
454 bool strict_size) {
455 std::optional<SubresourceExtent> resources;
456 if (new_info.type != ImageType::e3D) {
457 resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
458 } else {
459 resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
460 }
461 if (!resources) {
462 return std::nullopt;
463 }
464 return OverlapResult{
465 .gpu_addr = gpu_addr,
466 .cpu_addr = cpu_addr,
467 .resources = *resources,
468 };
469}
470
471[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
472 GPUVAddr gpu_addr,
473 VAddr cpu_addr,
474 const ImageBase& overlap,
475 bool strict_size) {
476 const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
477 if (!base) {
478 return std::nullopt;
479 }
480 const ImageInfo& info = overlap.info;
481 if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
482 return std::nullopt;
483 }
484 if (new_info.block != MipBlockSize(info, base->level)) {
485 return std::nullopt;
486 }
487 const SubresourceExtent resources = new_info.resources;
488 s32 layers = 1;
489 if (info.type != ImageType::e3D) {
490 layers = std::max(resources.layers, info.resources.layers + base->layer);
491 }
492 return OverlapResult{
493 .gpu_addr = overlap.gpu_addr,
494 .cpu_addr = overlap.cpu_addr,
495 .resources =
496 {
497 .levels = std::max(resources.levels + base->level, info.resources.levels),
498 .layers = layers,
499 },
500 };
501}
502
503[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
504 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
505 static constexpr u32 STRIDE_ALIGNMENT = 32;
506 ASSERT(info.type == ImageType::Linear);
507 const Extent2D num_tiles{
508 .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
509 .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
510 };
511 const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
512 return Extent2D{
513 .width = Common::AlignUp(num_tiles.width, width_alignment),
514 .height = num_tiles.height,
515 };
516}
517
518[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
519 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
520 ASSERT(info.type != ImageType::Linear);
521 const Extent3D size = AdjustMipSize(info.size, level);
522 const Extent3D num_tiles{
523 .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
524 .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
525 .depth = size.depth,
526 };
527 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
528 const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
529 const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
530 return Extent3D{
531 .width = Common::AlignBits(num_tiles.width, alignment),
532 .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
533 .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
534 };
535}
536
537[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
538 u32 num_blocks = 0;
539 for (s32 level = 0; level < info.resources.levels; ++level) {
540 const Extent3D mip_size = AdjustMipSize(info.size, level);
541 num_blocks += NumBlocks(mip_size, tile_size);
542 }
543 return num_blocks;
544}
545
546[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
547 ASSERT(info.type == ImageType::e3D);
548 u32 num_slices = 0;
549 for (s32 level = 0; level < info.resources.levels; ++level) {
550 num_slices += AdjustMipSize(info.size.depth, level);
551 }
552 return num_slices;
553}
554
555void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
556 const ImageInfo& info, const BufferImageCopy& copy,
557 std::span<const u8> memory) {
558 ASSERT(copy.image_offset.z == 0);
559 ASSERT(copy.image_extent.depth == 1);
560 ASSERT(copy.image_subresource.base_level == 0);
561 ASSERT(copy.image_subresource.base_layer == 0);
562 ASSERT(copy.image_subresource.num_layers == 1);
563
564 const u32 bytes_per_block = BytesPerBlock(info.format);
565 const u32 row_length = copy.image_extent.width * bytes_per_block;
566 const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
567
568 for (u32 line = 0; line < copy.image_extent.height; ++line) {
569 const u32 host_offset_y = line * info.pitch;
570 const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
571 const u32 guest_offset = guest_offset_x + guest_offset_y;
572 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
573 row_length);
574 }
575}
576
577void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
578 const ImageInfo& info, const BufferImageCopy& copy,
579 std::span<const u8> input) {
580 const Extent3D size = info.size;
581 const LevelInfo level_info = MakeLevelInfo(info);
582 const Extent2D tile_size = DefaultBlockSize(info.format);
583 const u32 bytes_per_block = BytesPerBlock(info.format);
584
585 const s32 level = copy.image_subresource.base_level;
586 const Extent3D level_size = AdjustMipSize(size, level);
587 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
588 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
589
590 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
591
592 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
593 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
594 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
595 UNIMPLEMENTED_IF(copy.image_extent != level_size);
596
597 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
598 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
599
600 size_t host_offset = copy.buffer_offset;
601
602 const u32 num_levels = info.resources.levels;
603 const std::array sizes = CalculateLevelSizes(level_info, num_levels);
604 size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
605 const size_t layer_stride =
606 AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
607 level_info.block, tile_size.height, info.tile_width_spacing);
608 const size_t subresource_size = sizes[level];
609
610 const auto dst_data = std::make_unique<u8[]>(subresource_size);
611 const std::span<u8> dst(dst_data.get(), subresource_size);
612
613 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
614 const std::span<const u8> src = input.subspan(host_offset);
615 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
616 num_tiles.depth, block.height, block.depth);
617
618 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
619
620 host_offset += host_bytes_per_layer;
621 guest_offset += layer_stride;
622 }
623 ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
624}
625
626} // Anonymous namespace
627
628u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
629 if (info.type == ImageType::Buffer) {
630 return info.size.width * BytesPerBlock(info.format);
631 }
632 if (info.type == ImageType::Linear) {
633 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
634 }
635 if (info.resources.layers > 1) {
636 ASSERT(info.layer_stride != 0);
637 return info.layer_stride * info.resources.layers;
638 } else {
639 return CalculateLayerSize(info);
640 }
641}
642
643u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
644 if (info.type == ImageType::Buffer) {
645 return info.size.width * BytesPerBlock(info.format);
646 }
647 if (info.num_samples > 1) {
648 // Multisample images can't be uploaded or downloaded to the host
649 return 0;
650 }
651 if (info.type == ImageType::Linear) {
652 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
653 }
654 const Extent2D tile_size = DefaultBlockSize(info.format);
655 return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
656}
657
658u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
659 if (info.type == ImageType::Buffer) {
660 return info.size.width * BytesPerBlock(info.format);
661 }
662 static constexpr Extent2D TILE_SIZE{1, 1};
663 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
664}
665
666u32 CalculateLayerStride(const ImageInfo& info) noexcept {
667 ASSERT(info.type != ImageType::Linear);
668 const u32 layer_size = CalculateLayerSize(info);
669 const Extent3D size = info.size;
670 const Extent3D block = info.block;
671 const u32 tile_size_y = DefaultBlockHeight(info.format);
672 return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
673}
674
675u32 CalculateLayerSize(const ImageInfo& info) noexcept {
676 ASSERT(info.type != ImageType::Linear);
677 return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
678 info.tile_width_spacing, info.resources.levels);
679}
680
681std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
682 ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
683 const LevelInfo level_info = MakeLevelInfo(info);
684 std::array<u32, MAX_MIP_LEVELS> offsets{};
685 u32 offset = 0;
686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 offsets[level] = offset;
688 offset += CalculateLevelSize(level_info, level);
689 }
690 return offsets;
691}
692
693std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
694 ASSERT(info.type == ImageType::e3D);
695 std::vector<u32> offsets;
696 offsets.reserve(NumSlices(info));
697
698 const LevelInfo level_info = MakeLevelInfo(info);
699 u32 mip_offset = 0;
700 for (s32 level = 0; level < info.resources.levels; ++level) {
701 const Extent3D tile_shift = TileShift(level_info, level);
702 const Extent3D tiles = LevelTiles(level_info, level);
703 const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
704 const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
705 const u32 z_mask = (1U << tile_shift.depth) - 1;
706 const u32 depth = AdjustMipSize(info.size.depth, level);
707 for (u32 slice = 0; slice < depth; ++slice) {
708 const u32 z_low = slice & z_mask;
709 const u32 z_high = slice & ~z_mask;
710 offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
711 }
712 mip_offset += CalculateLevelSize(level_info, level);
713 }
714 return offsets;
715}
716
717std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
718 ASSERT(info.type == ImageType::e3D);
719 std::vector<SubresourceBase> subresources;
720 subresources.reserve(NumSlices(info));
721 for (s32 level = 0; level < info.resources.levels; ++level) {
722 const s32 depth = AdjustMipSize(info.size.depth, level);
723 for (s32 slice = 0; slice < depth; ++slice) {
724 subresources.emplace_back(SubresourceBase{
725 .level = level,
726 .layer = slice,
727 });
728 }
729 }
730 return subresources;
731}
732
733u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
734 const Extent2D tile_size = DefaultBlockSize(info.format);
735 const Extent3D level_size = AdjustMipSize(info.size, level);
736 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
737 const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
738 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
739 return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
740}
741
742PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
743 return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
744 config.a_type, config.srgb_conversion);
745}
746
747ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
748 switch (info.type) {
749 case ImageType::e2D:
750 return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
751 case ImageType::e3D:
752 return ImageViewType::e2DArray;
753 case ImageType::Linear:
754 return ImageViewType::e2D;
755 default:
756 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
757 return ImageViewType{};
758 }
759}
760
761std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
762 SubresourceBase base) {
763 ASSERT(dst.resources.levels >= src.resources.levels);
764 ASSERT(dst.num_samples == src.num_samples);
765
766 const bool is_dst_3d = dst.type == ImageType::e3D;
767 if (is_dst_3d) {
768 ASSERT(src.type == ImageType::e3D);
769 ASSERT(src.resources.levels == 1);
770 }
771
772 std::vector<ImageCopy> copies;
773 copies.reserve(src.resources.levels);
774 for (s32 level = 0; level < src.resources.levels; ++level) {
775 ImageCopy& copy = copies.emplace_back();
776 copy.src_subresource = SubresourceLayers{
777 .base_level = level,
778 .base_layer = 0,
779 .num_layers = src.resources.layers,
780 };
781 copy.dst_subresource = SubresourceLayers{
782 .base_level = base.level + level,
783 .base_layer = is_dst_3d ? 0 : base.layer,
784 .num_layers = is_dst_3d ? 1 : src.resources.layers,
785 };
786 copy.src_offset = Offset3D{
787 .x = 0,
788 .y = 0,
789 .z = 0,
790 };
791 copy.dst_offset = Offset3D{
792 .x = 0,
793 .y = 0,
794 .z = is_dst_3d ? base.layer : 0,
795 };
796 const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
797 copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
798 if (is_dst_3d) {
799 copy.extent.depth = src.size.depth;
800 }
801 }
802 return copies;
803}
804
805bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
806 if (config.Address() == 0) {
807 return false;
808 }
809 if (config.Address() > (u64(1) << 48)) {
810 return false;
811 }
812 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
813}
814
815std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
816 const ImageInfo& info, std::span<u8> output) {
817 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
818 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
819 const Extent3D size = info.size;
820
821 if (info.type == ImageType::Linear) {
822 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
823
824 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
825 return {{
826 .buffer_offset = 0,
827 .buffer_size = guest_size_bytes,
828 .buffer_row_length = info.pitch >> bpp_log2,
829 .buffer_image_height = size.height,
830 .image_subresource =
831 {
832 .base_level = 0,
833 .base_layer = 0,
834 .num_layers = 1,
835 },
836 .image_offset = {0, 0, 0},
837 .image_extent = size,
838 }};
839 }
840 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
841 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
842 const std::span<const u8> input(input_data.get(), guest_size_bytes);
843
844 const LevelInfo level_info = MakeLevelInfo(info);
845 const s32 num_layers = info.resources.layers;
846 const s32 num_levels = info.resources.levels;
847 const Extent2D tile_size = DefaultBlockSize(info.format);
848 const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
849 const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
850 const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
851 const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
852 info.tile_width_spacing);
853 size_t guest_offset = 0;
854 u32 host_offset = 0;
855 std::vector<BufferImageCopy> copies(num_levels);
856
857 for (s32 level = 0; level < num_levels; ++level) {
858 const Extent3D level_size = AdjustMipSize(size, level);
859 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
860 const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
861 copies[level] = BufferImageCopy{
862 .buffer_offset = host_offset,
863 .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
864 .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
865 .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
866 .image_subresource =
867 {
868 .base_level = level,
869 .base_layer = 0,
870 .num_layers = info.resources.layers,
871 },
872 .image_offset = {0, 0, 0},
873 .image_extent = level_size,
874 };
875 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
876 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
877 const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
878 size_t guest_layer_offset = 0;
879
880 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
881 const std::span<u8> dst = output.subspan(host_offset);
882 const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
883 UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
884 num_tiles.depth, block.height, block.depth, stride_alignment);
885 guest_layer_offset += layer_stride;
886 host_offset += host_bytes_per_layer;
887 }
888 guest_offset += level_sizes[level];
889 }
890 return copies;
891}
892
893BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
894 const ImageBase& image, std::span<u8> output) {
895 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
896 return BufferCopy{
897 .src_offset = 0,
898 .dst_offset = 0,
899 .size = image.guest_size_bytes,
900 };
901}
902
903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
904 std::span<BufferImageCopy> copies) {
905 u32 output_offset = 0;
906
907 const Extent2D tile_size = DefaultBlockSize(info.format);
908 for (BufferImageCopy& copy : copies) {
909 const u32 level = copy.image_subresource.base_level;
910 const Extent3D mip_size = AdjustMipSize(info.size, level);
911 ASSERT(copy.image_offset == Offset3D{});
912 ASSERT(copy.image_subresource.base_layer == 0);
913 ASSERT(copy.image_extent == mip_size);
914 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
915 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
916
917 if (IsPixelFormatASTC(info.format)) {
918 ASSERT(copy.image_extent.depth == 1);
919 Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
920 copy.image_extent.width, copy.image_extent.height,
921 copy.image_subresource.num_layers, tile_size.width,
922 tile_size.height, output.subspan(output_offset));
923 } else {
924 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
925 output.subspan(output_offset));
926 }
927 copy.buffer_offset = output_offset;
928 copy.buffer_row_length = mip_size.width;
929 copy.buffer_image_height = mip_size.height;
930
931 output_offset += copy.image_extent.width * copy.image_extent.height *
932 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
933 }
934}
935
936std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
937 const Extent3D size = info.size;
938 const u32 bytes_per_block = BytesPerBlock(info.format);
939 if (info.type == ImageType::Linear) {
940 ASSERT(info.pitch % bytes_per_block == 0);
941 return {{
942 .buffer_offset = 0,
943 .buffer_size = static_cast<size_t>(info.pitch) * size.height,
944 .buffer_row_length = info.pitch / bytes_per_block,
945 .buffer_image_height = size.height,
946 .image_subresource =
947 {
948 .base_level = 0,
949 .base_layer = 0,
950 .num_layers = 1,
951 },
952 .image_offset = {0, 0, 0},
953 .image_extent = size,
954 }};
955 }
956 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
957
958 const s32 num_layers = info.resources.layers;
959 const s32 num_levels = info.resources.levels;
960 const Extent2D tile_size = DefaultBlockSize(info.format);
961
962 u32 host_offset = 0;
963
964 std::vector<BufferImageCopy> copies(num_levels);
965 for (s32 level = 0; level < num_levels; ++level) {
966 const Extent3D level_size = AdjustMipSize(size, level);
967 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
968 const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
969 copies[level] = BufferImageCopy{
970 .buffer_offset = host_offset,
971 .buffer_size = host_bytes_per_level,
972 .buffer_row_length = level_size.width,
973 .buffer_image_height = level_size.height,
974 .image_subresource =
975 {
976 .base_level = level,
977 .base_layer = 0,
978 .num_layers = info.resources.layers,
979 },
980 .image_offset = {0, 0, 0},
981 .image_extent = level_size,
982 };
983 host_offset += host_bytes_per_level;
984 }
985 return copies;
986}
987
988Extent3D MipSize(Extent3D size, u32 level) {
989 return AdjustMipSize(size, level);
990}
991
992Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
993 const LevelInfo level_info = MakeLevelInfo(info);
994 const Extent2D tile_size = DefaultBlockSize(info.format);
995 const Extent3D level_size = AdjustMipSize(info.size, level);
996 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
997 return AdjustMipBlockSize(num_tiles, level_info.block, level);
998}
999
1000std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1001 const Extent2D tile_size = DefaultBlockSize(info.format);
1002 if (info.type == ImageType::Linear) {
1003 return std::vector{SwizzleParameters{
1004 .num_tiles = AdjustTileSize(info.size, tile_size),
1005 .block = {},
1006 .buffer_offset = 0,
1007 .level = 0,
1008 }};
1009 }
1010 const LevelInfo level_info = MakeLevelInfo(info);
1011 const Extent3D size = info.size;
1012 const s32 num_levels = info.resources.levels;
1013
1014 u32 guest_offset = 0;
1015 std::vector<SwizzleParameters> params(num_levels);
1016 for (s32 level = 0; level < num_levels; ++level) {
1017 const Extent3D level_size = AdjustMipSize(size, level);
1018 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
1019 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
1020 params[level] = SwizzleParameters{
1021 .num_tiles = num_tiles,
1022 .block = block,
1023 .buffer_offset = guest_offset,
1024 .level = level,
1025 };
1026 guest_offset += CalculateLevelSize(level_info, level);
1027 }
1028 return params;
1029}
1030
1031void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
1032 std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
1033 const bool is_pitch_linear = info.type == ImageType::Linear;
1034 for (const BufferImageCopy& copy : copies) {
1035 if (is_pitch_linear) {
1036 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1037 } else {
1038 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1039 }
1040 }
1041}
1042
1043bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
1044 u32 rhs_level, bool strict_size) noexcept {
1045 ASSERT(lhs.type != ImageType::Linear);
1046 ASSERT(rhs.type != ImageType::Linear);
1047 if (strict_size) {
1048 const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
1049 const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
1050 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1051 } else {
1052 const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
1053 const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
1054 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1055 }
1056}
1057
1058bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
1059 ASSERT(lhs.type == ImageType::Linear);
1060 ASSERT(rhs.type == ImageType::Linear);
1061 if (strict_size) {
1062 return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
1063 } else {
1064 const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
1065 const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
1066 return lhs_size == rhs_size;
1067 }
1068}
1069
1070std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1071 VAddr cpu_addr, const ImageBase& overlap,
1072 bool strict_size) {
1073 ASSERT(new_info.type != ImageType::Linear);
1074 ASSERT(overlap.info.type != ImageType::Linear);
1075 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1076 return std::nullopt;
1077 }
1078 if (!IsViewCompatible(overlap.info.format, new_info.format)) {
1079 return std::nullopt;
1080 }
1081 if (gpu_addr == overlap.gpu_addr) {
1082 const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
1083 if (!solution) {
1084 return std::nullopt;
1085 }
1086 return OverlapResult{
1087 .gpu_addr = gpu_addr,
1088 .cpu_addr = cpu_addr,
1089 .resources = *solution,
1090 };
1091 }
1092 if (overlap.gpu_addr > gpu_addr) {
1093 return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1094 }
1095 // if overlap.gpu_addr < gpu_addr
1096 return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1097}
1098
1099bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1100 // If either of the layer strides is zero, we can assume they are compatible
1101 // These images generally come from rendertargets
1102 if (lhs.layer_stride == 0) {
1103 return true;
1104 }
1105 if (rhs.layer_stride == 0) {
1106 return true;
1107 }
1108 // It's definitely compatible if the layer stride matches
1109 if (lhs.layer_stride == rhs.layer_stride) {
1110 return true;
1111 }
1112 // Although we also have to compare for cases where it can be unaligned
1113 // This can happen if the image doesn't have layers, so the stride is not aligned
1114 if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
1115 return true;
1116 }
1117 return false;
1118}
1119
1120std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1121 GPUVAddr candidate_addr, RelaxedOptions options) {
1122 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1123 if (!base) {
1124 return std::nullopt;
1125 }
1126 const ImageInfo& existing = image.info;
1127 if (False(options & RelaxedOptions::Format)) {
1128 if (!IsViewCompatible(existing.format, candidate.format)) {
1129 return std::nullopt;
1130 }
1131 }
1132 if (!IsLayerStrideCompatible(existing, candidate)) {
1133 return std::nullopt;
1134 }
1135 if (existing.type != candidate.type) {
1136 return std::nullopt;
1137 }
1138 if (False(options & RelaxedOptions::Samples)) {
1139 if (existing.num_samples != candidate.num_samples) {
1140 return std::nullopt;
1141 }
1142 }
1143 if (existing.resources.levels < candidate.resources.levels + base->level) {
1144 return std::nullopt;
1145 }
1146 if (existing.type == ImageType::e3D) {
1147 const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
1148 if (mip_depth < candidate.size.depth + base->layer) {
1149 return std::nullopt;
1150 }
1151 } else {
1152 if (existing.resources.layers < candidate.resources.layers + base->layer) {
1153 return std::nullopt;
1154 }
1155 }
1156 const bool strict_size = False(options & RelaxedOptions::Size);
1157 if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
1158 return std::nullopt;
1159 }
1160 // TODO: compare block sizes
1161 return base;
1162}
1163
1164bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1165 RelaxedOptions options) {
1166 return FindSubresource(candidate, image, candidate_addr, options).has_value();
1167}
1168
1169void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1170 const ImageBase* src) {
1171 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1172 src_info.format = src->info.format;
1173 }
1174 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1175 dst_info.format = dst->info.format;
1176 }
1177 if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1178 dst_info.format = src->info.format;
1179 }
1180 if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1181 src_info.format = src->info.format;
1182 }
1183}
1184
1185u32 MapSizeBytes(const ImageBase& image) {
1186 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
1187 return image.guest_size_bytes;
1188 } else if (True(image.flags & ImageFlagBits::Converted)) {
1189 return image.converted_size_bytes;
1190 } else {
1191 return image.unswizzled_size_bytes;
1192 }
1193}
1194
1195using P = PixelFormat;
1196
1197static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
1198static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
1199
1200static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
1201static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
1202 0x50d200);
1203
1204static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
1205static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
1206static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
1207static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
1208static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
1209static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
1210static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
1211static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
1212static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
1213static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
1214
1215constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
1216 u32 tile_width_spacing, u32 level) {
1217 const Extent3D size{width, height, 1};
1218 const Extent3D block{0, block_height, 0};
1219 const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
1220 return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
1221}
1222
1223static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
1224static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
1225static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
1226
1227static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
1228 "Tile width spacing is not working");
1229static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
1230 "Compressed tile width spacing is not working");
1231
1232} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..dbbbd33cd
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,107 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <span>
9
10#include "common/common_types.h"
11
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/image_base.h"
15#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/types.h"
17#include "video_core/textures/texture.h"
18
19namespace VideoCommon {
20
21using Tegra::Texture::TICEntry;
22
23struct OverlapResult {
24 GPUVAddr gpu_addr;
25 VAddr cpu_addr;
26 SubresourceExtent resources;
27};
28
29[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
30
31[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
32
33[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
34
35[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
36
37[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
38
39[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
40 const ImageInfo& info) noexcept;
41
42[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
43
44[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
45
46[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
47
48[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
49 const Tegra::Texture::TICEntry& config) noexcept;
50
51[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
52
53[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
54 const ImageInfo& src,
55 SubresourceBase base);
56
57[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
58
59[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
60 GPUVAddr gpu_addr, const ImageInfo& info,
61 std::span<u8> output);
62
63[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
64 const ImageBase& image, std::span<u8> output);
65
66void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
67 std::span<BufferImageCopy> copies);
68
69[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
70
71[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
72
73[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
74
75[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
76
77void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
78 std::span<const BufferImageCopy> copies, std::span<const u8> memory);
79
80[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
81 const ImageInfo& overlap_info, u32 new_level,
82 u32 overlap_level, bool strict_size) noexcept;
83
84[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
85 bool strict_size) noexcept;
86
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap,
90 bool strict_size);
91
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93
94[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
95 const ImageBase& image,
96 GPUVAddr candidate_addr,
97 RelaxedOptions options);
98
99[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
100 GPUVAddr candidate_addr, RelaxedOptions options);
101
102void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
103 const ImageBase* src);
104
105[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
106
107} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstring> 20#include <cstring>
21#include <span>
21#include <vector> 22#include <vector>
22 23
23#include <boost/container/static_vector.hpp> 24#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600 return params; 601 return params;
601} 602}
602 603
603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, 604static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604 u32 blockHeight) { 605 u32 blockHeight) {
605 // Don't actually care about the void extent, just read the bits... 606 // Don't actually care about the void extent, just read the bits...
606 for (s32 i = 0; i < 4; ++i) { 607 for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623 } 624 }
624} 625}
625 626
626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { 627static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627 for (u32 j = 0; j < blockHeight; j++) { 628 for (u32 j = 0; j < blockHeight; j++) {
628 for (u32 i = 0; i < blockWidth; i++) { 629 for (u32 i = 0; i < blockWidth; i++) {
629 outBuf[j * blockWidth + i] = 0xFFFF00FF; 630 outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438#undef READ_INT_VALUES 1439#undef READ_INT_VALUES
1439} 1440}
1440 1441
1441static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, 1442static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442 u32* outBuf) { 1443 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443 InputBitStream strm(inBuf); 1444 InputBitStream strm(inBuf.data());
1444 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1445 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445 1446
1446 // Was there an error? 1447 // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601 } 1602 }
1602 1603
1603 // Read the texel weight data.. 1604 // Read the texel weight data..
1604 u8 texelWeightData[16]; 1605 std::array<u8, 16> texelWeightData;
1605 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1606 std::ranges::copy(inBuf, texelWeightData.begin());
1606 1607
1607 // Reverse everything 1608 // Reverse everything
1608 for (u32 i = 0; i < 8; i++) { 1609 for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618 1619
1619 // Make sure that higher non-texel bits are set to zero 1620 // Make sure that higher non-texel bits are set to zero
1620 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1621 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621 texelWeightData[clearByteStart - 1] = 1622 if (clearByteStart > 0) {
1622 texelWeightData[clearByteStart - 1] & 1623 texelWeightData[clearByteStart - 1] &=
1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1624 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1625 }
1626 std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625 1627
1626 IntegerEncodedVector texelWeightValues; 1628 IntegerEncodedVector texelWeightValues;
1627 1629
1628 InputBitStream weightStream(texelWeightData); 1630 InputBitStream weightStream(texelWeightData.data());
1629 1631
1630 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, 1632 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631 weightParams.GetNumWeightValues()); 1633 weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672 1674
1673namespace Tegra::Texture::ASTC { 1675namespace Tegra::Texture::ASTC {
1674 1676
1675std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, 1677void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676 u32 block_height) { 1678 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677 u32 blockIdx = 0; 1679 u32 block_index = 0;
1678 std::size_t depth_offset = 0; 1680 std::size_t depth_offset = 0;
1679 std::vector<u8> outData(height * width * depth * 4); 1681 for (u32 z = 0; z < depth; z++) {
1680 for (u32 k = 0; k < depth; k++) { 1682 for (u32 y = 0; y < height; y += block_height) {
1681 for (u32 j = 0; j < height; j += block_height) { 1683 for (u32 x = 0; x < width; x += block_width) {
1682 for (u32 i = 0; i < width; i += block_width) { 1684 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684 const u8* blockPtr = data + blockIdx * 16;
1685 1685
1686 // Blocks can be at most 12x12 1686 // Blocks can be at most 12x12
1687 u32 uncompData[144]; 1687 std::array<u32, 12 * 12> uncompData;
1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689 1689
1690 u32 decompWidth = std::min(block_width, width - i); 1690 u32 decompWidth = std::min(block_width, width - x);
1691 u32 decompHeight = std::min(block_height, height - j); 1691 u32 decompHeight = std::min(block_height, height - y);
1692 1692
1693 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1693 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694 for (u32 jj = 0; jj < decompHeight; jj++) { 1694 for (u32 jj = 0; jj < decompHeight; jj++) {
1695 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1695 std::memcpy(outRow.data() + jj * width * 4,
1696 uncompData.data() + jj * block_width, decompWidth * 4);
1696 } 1697 }
1697 1698 ++block_index;
1698 blockIdx++;
1699 } 1699 }
1700 } 1700 }
1701 depth_offset += height * width * 4; 1701 depth_offset += height * width * 4;
1702 } 1702 }
1703
1704 return outData;
1705} 1703}
1706 1704
1707} // namespace Tegra::Texture::ASTC 1705} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <cstdint> 7#include <cstdint>
8#include <vector>
9 8
10namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
11 10
12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
14 13
15} // namespace Tegra::Texture::ASTC 14} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index bd1aebf02..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 pixel_format);
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
18
19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
21
22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cmath> 6#include <cmath>
6#include <cstring> 7#include <cstring>
8#include <span>
9#include <utility>
10
7#include "common/alignment.h" 11#include "common/alignment.h"
8#include "common/assert.h" 12#include "common/assert.h"
9#include "common/bit_util.h" 13#include "common/bit_util.h"
14#include "common/div_ceil.h"
10#include "video_core/gpu.h" 15#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
13 18
14namespace Tegra::Texture { 19namespace Tegra::Texture {
15namespace {
16 20
21namespace {
17/** 22/**
18 * This table represents the internal swizzle of a gob, 23 * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
19 * in format 16 bytes x 2 sector packing.
20 * Calculates the offset of an (x, y) position within a swizzled texture. 24 * Calculates the offset of an (x, y) position within a swizzled texture.
21 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 25 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
22 */ 26 */
23template <std::size_t N, std::size_t M, u32 Align> 27constexpr SwizzleTable MakeSwizzleTableConst() {
24struct alignas(64) SwizzleTable { 28 SwizzleTable table{};
25 static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); 29 for (u32 y = 0; y < table.size(); ++y) {
26 constexpr SwizzleTable() { 30 for (u32 x = 0; x < table[0].size(); ++x) {
27 for (u32 y = 0; y < N; ++y) { 31 table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 for (u32 x = 0; x < M; ++x) { 32 (y % 2) * 16 + (x % 16);
29 const u32 x2 = x * Align;
30 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
31 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
32 }
33 } 33 }
34 } 34 }
35 const std::array<u16, M>& operator[](std::size_t index) const { 35 return table;
36 return values[index]; 36}
37 }
38 std::array<std::array<u16, M>, N> values{};
39};
40 37
41constexpr u32 FAST_SWIZZLE_ALIGN = 16; 38constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
42 39
43constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); 40template <bool TO_LINEAR>
44constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); 41void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
42 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
43 // The origin of the transformation can be configured here, leave it as zero as the current API
44 // doesn't expose it.
45 static constexpr u32 origin_x = 0;
46 static constexpr u32 origin_y = 0;
47 static constexpr u32 origin_z = 0;
45 48
46/** 49 // We can configure here a custom pitch
47 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 50 // As it's not exposed 'width * bpp' will be the expected pitch.
48 * Instead of going gob by gob, we map the coordinates inside a block and manage from 51 const u32 pitch = width * bytes_per_pixel;
49 * those. Block_Width is assumed to be 1. 52 const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
50 */
51void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
52 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
53 const u32 y_end, const u32 z_end, const u32 tile_offset,
54 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
55 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
56 std::array<u8*, 2> data_ptrs;
57 u32 z_address = tile_offset;
58
59 for (u32 z = z_start; z < z_end; z++) {
60 u32 y_address = z_address;
61 u32 pixel_base = layer_z * z + y_start * stride_x;
62 for (u32 y = y_start; y < y_end; y++) {
63 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
64 for (u32 x = x_start; x < x_end; x++) {
65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
70 }
71 pixel_base += stride_x;
72 if ((y + 1) % GOB_SIZE_Y == 0)
73 y_address += GOB_SIZE;
74 }
75 z_address += xy_block_size;
76 }
77}
78 53
79/** 54 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 55 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
81 * Instead of going gob by gob, we map the coordinates inside a block and manage from 56 const u32 slice_size =
82 * those. Block_Width is assumed to be 1. 57 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
83 */
84void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
89 std::array<u8*, 2> data_ptrs;
90 u32 z_address = tile_offset;
91 const u32 x_startb = x_start * bytes_per_pixel;
92 const u32 x_endb = x_end * bytes_per_pixel;
93
94 for (u32 z = z_start; z < z_end; z++) {
95 u32 y_address = z_address;
96 u32 pixel_base = layer_z * z + y_start * stride_x;
97 for (u32 y = y_start; y < y_end; y++) {
98 const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
99 for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
100 const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
101 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
102 const u32 pixel_index{out_x + pixel_base};
103 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
104 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
105 std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
106 }
107 pixel_base += stride_x;
108 if ((y + 1) % GOB_SIZE_Y == 0)
109 y_address += GOB_SIZE;
110 }
111 z_address += xy_block_size;
112 }
113}
114 58
115/** 59 const u32 block_height_mask = (1U << block_height) - 1;
116 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 60 const u32 block_depth_mask = (1U << block_depth) - 1;
117 * The body of this function takes care of splitting the swizzled texture into blocks, 61 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
118 * and managing the extents of it. Once all the parameters of a single block are obtained, 62
119 * the function calls 'ProcessBlock' to process that particular Block. 63 for (u32 slice = 0; slice < depth; ++slice) {
120 * 64 const u32 z = slice + origin_z;
121 * Documentation for the memory layout and decoding can be found at: 65 const u32 offset_z = (z >> block_depth) * slice_size +
122 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 66 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
123 */ 67 for (u32 line = 0; line < height; ++line) {
124template <bool fast> 68 const u32 y = line + origin_y;
125void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, 69 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
126 const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, 70
127 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, 71 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
128 const u32 width_spacing) { 72 const u32 offset_y = (block_y >> block_height) * block_size +
129 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 73 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
130 const u32 stride_x = width * out_bytes_per_pixel; 74
131 const u32 layer_z = height * stride_x; 75 for (u32 column = 0; column < width; ++column) {
132 const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; 76 const u32 x = (column + origin_x) * bytes_per_pixel;
133 constexpr u32 gob_elements_y = GOB_SIZE_Y; 77 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134 constexpr u32 gob_elements_z = GOB_SIZE_Z; 78
135 const u32 block_x_elements = gob_elements_x; 79 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 block_y_elements = gob_elements_y * block_height; 80 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
137 const u32 block_z_elements = gob_elements_z * block_depth; 81
138 const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); 82 const u32 unswizzled_offset =
139 const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); 83 slice * pitch * height + line * pitch + column * bytes_per_pixel;
140 const u32 blocks_on_y = div_ceil(height, block_y_elements); 84
141 const u32 blocks_on_z = div_ceil(depth, block_z_elements); 85 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u32 xy_block_size = GOB_SIZE * block_height; 86 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143 const u32 block_size = xy_block_size * block_depth; 87 std::memcpy(dst, src, bytes_per_pixel);
144 u32 tile_offset = 0;
145 for (u32 zb = 0; zb < blocks_on_z; zb++) {
146 const u32 z_start = zb * block_z_elements;
147 const u32 z_end = std::min(depth, z_start + block_z_elements);
148 for (u32 yb = 0; yb < blocks_on_y; yb++) {
149 const u32 y_start = yb * block_y_elements;
150 const u32 y_end = std::min(height, y_start + block_y_elements);
151 for (u32 xb = 0; xb < blocks_on_x; xb++) {
152 const u32 x_start = xb * block_x_elements;
153 const u32 x_end = std::min(width, x_start + block_x_elements);
154 if constexpr (fast) {
155 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
156 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
157 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
158 } else {
159 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
160 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
161 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
162 }
163 tile_offset += block_size;
164 } 88 }
165 } 89 }
166 } 90 }
167} 91}
168
169} // Anonymous namespace 92} // Anonymous namespace
170 93
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 94SwizzleTable MakeSwizzleTable() {
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 95 return SWIZZLE_TABLE;
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
174 const u32 block_height_size{1U << block_height};
175 const u32 block_depth_size{1U << block_depth};
176 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
177 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
178 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
179 block_depth_size, width_spacing);
180 } else {
181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
182 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
183 block_depth_size, width_spacing);
184 }
185} 96}
186 97
187void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 98void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
188 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 99 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
189 u32 block_depth, u32 width_spacing) { 100 u32 stride_alignment) {
190 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 101 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
191 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 102 stride_alignment);
192 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
193 width_spacing);
194} 103}
195 104
196std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 105void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
197 u32 width, u32 height, u32 depth, u32 block_height, 106 u32 height, u32 depth, u32 block_height, u32 block_depth,
198 u32 block_depth, u32 width_spacing) { 107 u32 stride_alignment) {
199 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 108 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
200 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 109 stride_alignment);
201 width, height, depth, block_height, block_depth, width_spacing);
202 return unswizzled_data;
203} 110}
204 111
205void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 112void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
213 const u32 gob_address_y = 120 const u32 gob_address_y =
214 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 121 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
215 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 122 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
216 const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 123 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
217 for (u32 x = 0; x < subrect_width; ++x) { 124 for (u32 x = 0; x < subrect_width; ++x) {
218 const u32 dst_x = x + offset_x; 125 const u32 dst_x = x + offset_x;
219 const u32 gob_address = 126 const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); 142 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 143
237 const u32 block_height_mask = (1U << block_height) - 1; 144 const u32 block_height_mask = (1U << block_height) - 1;
238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; 145 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
239 146
240 for (u32 line = 0; line < line_count; ++line) { 147 for (u32 line = 0; line < line_count; ++line) {
241 const u32 src_y = line + origin_y; 148 const u32 src_y = line + origin_y;
242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 149 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 150
244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 151 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 const u32 src_offset_y = (block_y >> block_height) * block_size + 152 const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 177 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
271 178
272 for (u32 line = 0; line < line_count; ++line) { 179 for (u32 line = 0; line < line_count; ++line) {
273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 180 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
274 const u32 block_y = line / GOB_SIZE_Y; 181 const u32 block_y = line / GOB_SIZE_Y;
275 const u32 dst_offset_y = 182 const u32 dst_offset_y =
276 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 183 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
293 const std::size_t gob_address_y = 200 const std::size_t gob_address_y =
294 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 201 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
295 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 202 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
296 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; 203 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
297 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 204 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
298 const std::size_t gob_address = 205 const std::size_t gob_address =
299 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 206 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <span>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
15constexpr u32 GOB_SIZE_Z = 1; 16constexpr u32 GOB_SIZE_Z = 1;
16constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; 17constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
17 18
18constexpr std::size_t GOB_SIZE_X_SHIFT = 6; 19constexpr u32 GOB_SIZE_X_SHIFT = 6;
19constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; 20constexpr u32 GOB_SIZE_Y_SHIFT = 3;
20constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; 21constexpr u32 GOB_SIZE_Z_SHIFT = 0;
21constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; 22constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
22 23
23/// Unswizzles a swizzled texture without changing its format. 24using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
24void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 25
25 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 26/// Returns a z-order swizzle table
26 u32 block_height = TICEntry::DefaultBlockHeight, 27SwizzleTable MakeSwizzleTable();
27 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 28
28 29/// Unswizzles a block linear texture into linear memory.
29/// Unswizzles a swizzled texture without changing its format. 30void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
30std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 31 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
31 u32 width, u32 height, u32 depth, 32 u32 stride_alignment = 1);
32 u32 block_height = TICEntry::DefaultBlockHeight, 33
33 u32 block_depth = TICEntry::DefaultBlockHeight, 34/// Swizzles linear memory into a block linear texture.
34 u32 width_spacing = 0); 35void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
35 36 u32 height, u32 depth, u32 block_height, u32 block_depth,
36/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 37 u32 stride_alignment = 1);
37void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
38 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
39 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
40 38
41/// This function calculates the correct size of a texture depending if it's tiled or not. 39/// This function calculates the correct size of a texture depending if it's tiled or not.
42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 40std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7 7
8#include "common/cityhash.h"
8#include "core/settings.h" 9#include "core/settings.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
12using Tegra::Texture::TICEntry;
13using Tegra::Texture::TSCEntry;
14
11namespace Tegra::Texture { 15namespace Tegra::Texture {
12 16
13namespace { 17namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
65 69
66} // Anonymous namespace 70} // Anonymous namespace
67 71
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { 72std::array<float, 4> TSCEntry::BorderColor() const noexcept {
69 if (!srgb_conversion) { 73 if (!srgb_conversion) {
70 return border_color; 74 return border_color;
71 } 75 }
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; 77 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74} 78}
75 79
76float TSCEntry::GetMaxAnisotropy() const noexcept { 80float TSCEntry::MaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); 81 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78} 82}
79 83
80} // namespace Tegra::Texture 84} // namespace Tegra::Texture
85
86size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
87 return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
88}
89
90size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
91 return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
92}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index bbc7e3eaf..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
53 BC4 = 0x27, 53 BC4 = 0x27,
54 BC5 = 0x28, 54 BC5 = 0x28,
55 S8D24 = 0x29, 55 S8D24 = 0x29,
56 X8Z24 = 0x2a, 56 X8D24 = 0x2a,
57 D24S8 = 0x2b, 57 D24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4D24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4D24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8D24__COV4R12V = 0x2e,
61 D32 = 0x2f, 61 D32 = 0x2f,
62 D32S8 = 0x30, 62 D32S8 = 0x30,
63 X8Z24_X20V4S8__COV4R4V = 0x31, 63 X8D24_X20V4S8__COV4R4V = 0x31,
64 X8Z24_X20V4S8__COV8R8V = 0x32, 64 X8D24_X20V4S8__COV8R8V = 0x32,
65 ZF32_X20V4X8__COV4R4V = 0x33, 65 D32_X20V4X8__COV4R4V = 0x33,
66 ZF32_X20V4X8__COV8R8V = 0x34, 66 D32_X20V4X8__COV8R8V = 0x34,
67 ZF32_X20V4S8__COV4R4V = 0x35, 67 D32_X20V4S8__COV4R4V = 0x35,
68 ZF32_X20V4S8__COV8R8V = 0x36, 68 D32_X20V4S8__COV8R8V = 0x36,
69 X8Z24_X16V8S8__COV4R12V = 0x37, 69 X8D24_X16V8S8__COV4R12V = 0x37,
70 ZF32_X16V8X8__COV4R12V = 0x38, 70 D32_X16V8X8__COV4R12V = 0x38,
71 ZF32_X16V8S8__COV4R12V = 0x39, 71 D32_X16V8S8__COV4R12V = 0x39,
72 D16 = 0x3a, 72 D16 = 0x3a,
73 V8Z24__COV8R24V = 0x3b, 73 V8D24__COV8R24V = 0x3b,
74 X8Z24_X16V8S8__COV8R24V = 0x3c, 74 X8D24_X16V8S8__COV8R24V = 0x3c,
75 ZF32_X16V8X8__COV8R24V = 0x3d, 75 D32_X16V8X8__COV8R24V = 0x3d,
76 ZF32_X16V8S8__COV8R24V = 0x3e, 76 D32_X16V8S8__COV8R24V = 0x3e,
77 ASTC_2D_4X4 = 0x40, 77 ASTC_2D_4X4 = 0x40,
78 ASTC_2D_5X5 = 0x41, 78 ASTC_2D_5X5 = 0x41,
79 ASTC_2D_6X6 = 0x42, 79 ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
146}; 146};
147 147
148union TextureHandle { 148union TextureHandle {
149 /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} 149 /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
150 150
151 u32 raw; 151 u32 raw;
152 BitField<0, 20, u32> tic_id; 152 BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157struct TICEntry { 157struct TICEntry {
158 static constexpr u32 DefaultBlockHeight = 16;
159 static constexpr u32 DefaultBlockDepth = 1;
160
161 union {
162 u32 raw;
163 BitField<0, 7, TextureFormat> format;
164 BitField<7, 3, ComponentType> r_type;
165 BitField<10, 3, ComponentType> g_type;
166 BitField<13, 3, ComponentType> b_type;
167 BitField<16, 3, ComponentType> a_type;
168
169 BitField<19, 3, SwizzleSource> x_source;
170 BitField<22, 3, SwizzleSource> y_source;
171 BitField<25, 3, SwizzleSource> z_source;
172 BitField<28, 3, SwizzleSource> w_source;
173 };
174 u32 address_low;
175 union { 158 union {
176 BitField<0, 16, u32> address_high; 159 struct {
177 BitField<21, 3, TICHeaderVersion> header_version; 160 union {
178 }; 161 BitField<0, 7, TextureFormat> format;
179 union { 162 BitField<7, 3, ComponentType> r_type;
180 BitField<0, 3, u32> block_width; 163 BitField<10, 3, ComponentType> g_type;
181 BitField<3, 3, u32> block_height; 164 BitField<13, 3, ComponentType> b_type;
182 BitField<6, 3, u32> block_depth; 165 BitField<16, 3, ComponentType> a_type;
166
167 BitField<19, 3, SwizzleSource> x_source;
168 BitField<22, 3, SwizzleSource> y_source;
169 BitField<25, 3, SwizzleSource> z_source;
170 BitField<28, 3, SwizzleSource> w_source;
171 };
172 u32 address_low;
173 union {
174 BitField<0, 16, u32> address_high;
175 BitField<16, 5, u32> layer_base_3_7;
176 BitField<21, 3, TICHeaderVersion> header_version;
177 BitField<24, 1, u32> load_store_hint;
178 BitField<25, 4, u32> view_coherency_hash;
179 BitField<29, 3, u32> layer_base_8_10;
180 };
181 union {
182 BitField<0, 3, u32> block_width;
183 BitField<3, 3, u32> block_height;
184 BitField<6, 3, u32> block_depth;
183 185
184 BitField<10, 3, u32> tile_width_spacing; 186 BitField<10, 3, u32> tile_width_spacing;
185 187
186 // High 16 bits of the pitch value 188 // High 16 bits of the pitch value
187 BitField<0, 16, u32> pitch_high; 189 BitField<0, 16, u32> pitch_high;
188 BitField<26, 1, u32> use_header_opt_control; 190 BitField<26, 1, u32> use_header_opt_control;
189 BitField<27, 1, u32> depth_texture; 191 BitField<27, 1, u32> depth_texture;
190 BitField<28, 4, u32> max_mip_level; 192 BitField<28, 4, u32> max_mip_level;
191 193
192 BitField<0, 16, u32> buffer_high_width_minus_one; 194 BitField<0, 16, u32> buffer_high_width_minus_one;
193 }; 195 };
194 union { 196 union {
195 BitField<0, 16, u32> width_minus_1; 197 BitField<0, 16, u32> width_minus_one;
196 BitField<22, 1, u32> srgb_conversion; 198 BitField<16, 3, u32> layer_base_0_2;
197 BitField<23, 4, TextureType> texture_type; 199 BitField<22, 1, u32> srgb_conversion;
198 BitField<29, 3, u32> border_size; 200 BitField<23, 4, TextureType> texture_type;
201 BitField<29, 3, u32> border_size;
199 202
200 BitField<0, 16, u32> buffer_low_width_minus_one; 203 BitField<0, 16, u32> buffer_low_width_minus_one;
201 }; 204 };
202 union { 205 union {
203 BitField<0, 16, u32> height_minus_1; 206 BitField<0, 16, u32> height_minus_1;
204 BitField<16, 14, u32> depth_minus_1; 207 BitField<16, 14, u32> depth_minus_1;
205 }; 208 BitField<30, 1, u32> is_sparse;
206 union { 209 BitField<31, 1, u32> normalized_coords;
207 BitField<6, 13, u32> mip_lod_bias; 210 };
208 BitField<27, 3, u32> max_anisotropy; 211 union {
212 BitField<6, 13, u32> mip_lod_bias;
213 BitField<27, 3, u32> max_anisotropy;
214 };
215 union {
216 BitField<0, 4, u32> res_min_mip_level;
217 BitField<4, 4, u32> res_max_mip_level;
218 BitField<8, 4, MsaaMode> msaa_mode;
219 BitField<12, 12, u32> min_lod_clamp;
220 };
221 };
222 std::array<u64, 4> raw;
209 }; 223 };
210 224
211 union { 225 constexpr bool operator==(const TICEntry& rhs) const noexcept {
212 BitField<0, 4, u32> res_min_mip_level; 226 return raw == rhs.raw;
213 BitField<4, 4, u32> res_max_mip_level; 227 }
214 BitField<8, 4, MsaaMode> msaa_mode;
215 BitField<12, 12, u32> min_lod_clamp;
216 };
217 228
218 GPUVAddr Address() const { 229 constexpr bool operator!=(const TICEntry& rhs) const noexcept {
230 return raw != rhs.raw;
231 }
232
233 constexpr GPUVAddr Address() const {
219 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); 234 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
220 } 235 }
221 236
222 u32 Pitch() const { 237 constexpr u32 Pitch() const {
223 ASSERT(header_version == TICHeaderVersion::Pitch || 238 ASSERT(header_version == TICHeaderVersion::Pitch ||
224 header_version == TICHeaderVersion::PitchColorKey); 239 header_version == TICHeaderVersion::PitchColorKey);
225 // The pitch value is 21 bits, and is 32B aligned. 240 // The pitch value is 21 bits, and is 32B aligned.
226 return pitch_high << 5; 241 return pitch_high << 5;
227 } 242 }
228 243
229 u32 Width() const { 244 constexpr u32 Width() const {
230 if (header_version != TICHeaderVersion::OneDBuffer) { 245 if (header_version != TICHeaderVersion::OneDBuffer) {
231 return width_minus_1 + 1; 246 return width_minus_one + 1;
232 } 247 }
233 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; 248 return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
234 } 249 }
235 250
236 u32 Height() const { 251 constexpr u32 Height() const {
237 return height_minus_1 + 1; 252 return height_minus_1 + 1;
238 } 253 }
239 254
240 u32 Depth() const { 255 constexpr u32 Depth() const {
241 return depth_minus_1 + 1; 256 return depth_minus_1 + 1;
242 } 257 }
243 258
244 u32 BlockWidth() const { 259 constexpr u32 BaseLayer() const {
245 ASSERT(IsTiled()); 260 return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
246 return block_width;
247 }
248
249 u32 BlockHeight() const {
250 ASSERT(IsTiled());
251 return block_height;
252 }
253
254 u32 BlockDepth() const {
255 ASSERT(IsTiled());
256 return block_depth;
257 } 261 }
258 262
259 bool IsTiled() const { 263 constexpr bool IsBlockLinear() const {
260 return header_version == TICHeaderVersion::BlockLinear || 264 return header_version == TICHeaderVersion::BlockLinear ||
261 header_version == TICHeaderVersion::BlockLinearColorKey; 265 header_version == TICHeaderVersion::BlockLinearColorKey;
262 } 266 }
263 267
264 bool IsLineal() const { 268 constexpr bool IsPitchLinear() const {
265 return header_version == TICHeaderVersion::Pitch || 269 return header_version == TICHeaderVersion::Pitch ||
266 header_version == TICHeaderVersion::PitchColorKey; 270 header_version == TICHeaderVersion::PitchColorKey;
267 } 271 }
268 272
269 bool IsBuffer() const { 273 constexpr bool IsBuffer() const {
270 return header_version == TICHeaderVersion::OneDBuffer; 274 return header_version == TICHeaderVersion::OneDBuffer;
271 } 275 }
272
273 bool IsSrgbConversionEnabled() const {
274 return srgb_conversion != 0;
275 }
276}; 276};
277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); 277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
278 278
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
309 Linear = 3, 309 Linear = 3,
310}; 310};
311 311
312enum class SamplerReduction : u32 {
313 WeightedAverage = 0,
314 Min = 1,
315 Max = 2,
316};
317
312enum class Anisotropy { 318enum class Anisotropy {
313 Default, 319 Default,
314 Filter2x, 320 Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
333 BitField<0, 2, TextureFilter> mag_filter; 339 BitField<0, 2, TextureFilter> mag_filter;
334 BitField<4, 2, TextureFilter> min_filter; 340 BitField<4, 2, TextureFilter> min_filter;
335 BitField<6, 2, TextureMipmapFilter> mipmap_filter; 341 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
342 BitField<8, 1, u32> cubemap_anisotropy;
336 BitField<9, 1, u32> cubemap_interface_filtering; 343 BitField<9, 1, u32> cubemap_interface_filtering;
344 BitField<10, 2, SamplerReduction> reduction_filter;
337 BitField<12, 13, u32> mip_lod_bias; 345 BitField<12, 13, u32> mip_lod_bias;
346 BitField<25, 1, u32> float_coord_normalization;
347 BitField<26, 5, u32> trilin_opt;
338 }; 348 };
339 union { 349 union {
340 BitField<0, 12, u32> min_lod_clamp; 350 BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
347 }; 357 };
348 std::array<f32, 4> border_color; 358 std::array<f32, 4> border_color;
349 }; 359 };
350 std::array<u8, 0x20> raw; 360 std::array<u64, 4> raw;
351 }; 361 };
352 362
353 std::array<float, 4> GetBorderColor() const noexcept; 363 constexpr bool operator==(const TSCEntry& rhs) const noexcept {
364 return raw == rhs.raw;
365 }
366
367 constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
368 return raw != rhs.raw;
369 }
370
371 std::array<float, 4> BorderColor() const noexcept;
354 372
355 float GetMaxAnisotropy() const noexcept; 373 float MaxAnisotropy() const noexcept;
356 374
357 float GetMinLod() const { 375 float MinLod() const {
358 return static_cast<float>(min_lod_clamp) / 256.0f; 376 return static_cast<float>(min_lod_clamp) / 256.0f;
359 } 377 }
360 378
361 float GetMaxLod() const { 379 float MaxLod() const {
362 return static_cast<float>(max_lod_clamp) / 256.0f; 380 return static_cast<float>(max_lod_clamp) / 256.0f;
363 } 381 }
364 382
365 float GetLodBias() const { 383 float LodBias() const {
366 // Sign extend the 13-bit value. 384 // Sign extend the 13-bit value.
367 constexpr u32 mask = 1U << (13 - 1); 385 static constexpr u32 mask = 1U << (13 - 1);
368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 386 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
369 } 387 }
370}; 388};
371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 389static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
372 390
373struct FullTextureInfo { 391} // namespace Tegra::Texture
374 TICEntry tic; 392
375 TSCEntry tsc; 393template <>
394struct std::hash<Tegra::Texture::TICEntry> {
395 size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
376}; 396};
377 397
378} // namespace Tegra::Texture 398template <>
399struct std::hash<Tegra::Texture::TSCEntry> {
400 size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
401};